1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/CodeGenCommonISel.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineJumpTableInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/DivisionByConstantInfo.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/KnownBits.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // First, check if tail calls have been disabled in this function.
56   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57     return false;
58 
59   // Conservatively require the attributes of the call to match those of
60   // the return. Ignore following attributes because they don't affect the
61   // call sequence.
62   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
63   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
64                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
65                            Attribute::NonNull, Attribute::NoUndef})
66     CallerAttrs.removeAttribute(Attr);
67 
68   if (CallerAttrs.hasAttributes())
69     return false;
70 
71   // It's not safe to eliminate the sign / zero extension of the return value.
72   if (CallerAttrs.contains(Attribute::ZExt) ||
73       CallerAttrs.contains(Attribute::SExt))
74     return false;
75 
76   // Check if the only use is a function return node.
77   return isUsedByReturnOnly(Node, Chain);
78 }
79 
80 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
81     const uint32_t *CallerPreservedMask,
82     const SmallVectorImpl<CCValAssign> &ArgLocs,
83     const SmallVectorImpl<SDValue> &OutVals) const {
84   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
85     const CCValAssign &ArgLoc = ArgLocs[I];
86     if (!ArgLoc.isRegLoc())
87       continue;
88     MCRegister Reg = ArgLoc.getLocReg();
89     // Only look at callee saved registers.
90     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
91       continue;
92     // Check that we pass the value used for the caller.
93     // (We look for a CopyFromReg reading a virtual register that is used
94     //  for the function live-in value of register Reg)
95     SDValue Value = OutVals[I];
96     if (Value->getOpcode() == ISD::AssertZext)
97       Value = Value.getOperand(0);
98     if (Value->getOpcode() != ISD::CopyFromReg)
99       return false;
100     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
101     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
102       return false;
103   }
104   return true;
105 }
106 
107 /// Set CallLoweringInfo attribute flags based on a call instruction
108 /// and called function attributes.
109 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
110                                                      unsigned ArgIdx) {
111   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
112   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
113   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
114   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
115   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
116   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
117   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
118   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
119   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
120   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
121   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
122   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
123   Alignment = Call->getParamStackAlign(ArgIdx);
124   IndirectType = nullptr;
125   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
126          "multiple ABI attributes?");
127   if (IsByVal) {
128     IndirectType = Call->getParamByValType(ArgIdx);
129     if (!Alignment)
130       Alignment = Call->getParamAlign(ArgIdx);
131   }
132   if (IsPreallocated)
133     IndirectType = Call->getParamPreallocatedType(ArgIdx);
134   if (IsInAlloca)
135     IndirectType = Call->getParamInAllocaType(ArgIdx);
136   if (IsSRet)
137     IndirectType = Call->getParamStructRetType(ArgIdx);
138 }
139 
140 /// Generate a libcall taking the given operands as arguments and returning a
141 /// result of type RetVT.
142 std::pair<SDValue, SDValue>
143 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
144                             ArrayRef<SDValue> Ops,
145                             MakeLibCallOptions CallOptions,
146                             const SDLoc &dl,
147                             SDValue InChain) const {
148   if (!InChain)
149     InChain = DAG.getEntryNode();
150 
151   TargetLowering::ArgListTy Args;
152   Args.reserve(Ops.size());
153 
154   TargetLowering::ArgListEntry Entry;
155   for (unsigned i = 0; i < Ops.size(); ++i) {
156     SDValue NewOp = Ops[i];
157     Entry.Node = NewOp;
158     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
159     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
160                                                  CallOptions.IsSExt);
161     Entry.IsZExt = !Entry.IsSExt;
162 
163     if (CallOptions.IsSoften &&
164         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
165       Entry.IsSExt = Entry.IsZExt = false;
166     }
167     Args.push_back(Entry);
168   }
169 
170   if (LC == RTLIB::UNKNOWN_LIBCALL)
171     report_fatal_error("Unsupported library call operation!");
172   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
173                                          getPointerTy(DAG.getDataLayout()));
174 
175   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
176   TargetLowering::CallLoweringInfo CLI(DAG);
177   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
178   bool zeroExtend = !signExtend;
179 
180   if (CallOptions.IsSoften &&
181       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
182     signExtend = zeroExtend = false;
183   }
184 
185   CLI.setDebugLoc(dl)
186       .setChain(InChain)
187       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
188       .setNoReturn(CallOptions.DoesNotReturn)
189       .setDiscardResult(!CallOptions.IsReturnValueUsed)
190       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
191       .setSExtResult(signExtend)
192       .setZExtResult(zeroExtend);
193   return LowerCallTo(CLI);
194 }
195 
196 bool TargetLowering::findOptimalMemOpLowering(
197     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
198     unsigned SrcAS, const AttributeList &FuncAttributes) const {
199   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
200     return false;
201 
202   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
203 
204   if (VT == MVT::Other) {
205     // Use the largest integer type whose alignment constraints are satisfied.
206     // We only need to check DstAlign here as SrcAlign is always greater or
207     // equal to DstAlign (or zero).
208     VT = MVT::i64;
209     if (Op.isFixedDstAlign())
210       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
211              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
212         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
213     assert(VT.isInteger());
214 
215     // Find the largest legal integer type.
216     MVT LVT = MVT::i64;
217     while (!isTypeLegal(LVT))
218       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
219     assert(LVT.isInteger());
220 
221     // If the type we've chosen is larger than the largest legal integer type
222     // then use that instead.
223     if (VT.bitsGT(LVT))
224       VT = LVT;
225   }
226 
227   unsigned NumMemOps = 0;
228   uint64_t Size = Op.size();
229   while (Size) {
230     unsigned VTSize = VT.getSizeInBits() / 8;
231     while (VTSize > Size) {
232       // For now, only use non-vector load / store's for the left-over pieces.
233       EVT NewVT = VT;
234       unsigned NewVTSize;
235 
236       bool Found = false;
237       if (VT.isVector() || VT.isFloatingPoint()) {
238         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
239         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
240             isSafeMemOpType(NewVT.getSimpleVT()))
241           Found = true;
242         else if (NewVT == MVT::i64 &&
243                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
244                  isSafeMemOpType(MVT::f64)) {
245           // i64 is usually not legal on 32-bit targets, but f64 may be.
246           NewVT = MVT::f64;
247           Found = true;
248         }
249       }
250 
251       if (!Found) {
252         do {
253           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
254           if (NewVT == MVT::i8)
255             break;
256         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
257       }
258       NewVTSize = NewVT.getSizeInBits() / 8;
259 
260       // If the new VT cannot cover all of the remaining bits, then consider
261       // issuing a (or a pair of) unaligned and overlapping load / store.
262       bool Fast;
263       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
264           allowsMisalignedMemoryAccesses(
265               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
266               MachineMemOperand::MONone, &Fast) &&
267           Fast)
268         VTSize = Size;
269       else {
270         VT = NewVT;
271         VTSize = NewVTSize;
272       }
273     }
274 
275     if (++NumMemOps > Limit)
276       return false;
277 
278     MemOps.push_back(VT);
279     Size -= VTSize;
280   }
281 
282   return true;
283 }
284 
285 /// Soften the operands of a comparison. This code is shared among BR_CC,
286 /// SELECT_CC, and SETCC handlers.
287 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
288                                          SDValue &NewLHS, SDValue &NewRHS,
289                                          ISD::CondCode &CCCode,
290                                          const SDLoc &dl, const SDValue OldLHS,
291                                          const SDValue OldRHS) const {
292   SDValue Chain;
293   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
294                              OldRHS, Chain);
295 }
296 
297 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
298                                          SDValue &NewLHS, SDValue &NewRHS,
299                                          ISD::CondCode &CCCode,
300                                          const SDLoc &dl, const SDValue OldLHS,
301                                          const SDValue OldRHS,
302                                          SDValue &Chain,
303                                          bool IsSignaling) const {
304   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
305   // not supporting it. We can update this code when libgcc provides such
306   // functions.
307 
308   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
309          && "Unsupported setcc type!");
310 
311   // Expand into one or more soft-fp libcall(s).
312   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
313   bool ShouldInvertCC = false;
314   switch (CCCode) {
315   case ISD::SETEQ:
316   case ISD::SETOEQ:
317     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
318           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
319           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
320     break;
321   case ISD::SETNE:
322   case ISD::SETUNE:
323     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
324           (VT == MVT::f64) ? RTLIB::UNE_F64 :
325           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
326     break;
327   case ISD::SETGE:
328   case ISD::SETOGE:
329     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
330           (VT == MVT::f64) ? RTLIB::OGE_F64 :
331           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
332     break;
333   case ISD::SETLT:
334   case ISD::SETOLT:
335     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
336           (VT == MVT::f64) ? RTLIB::OLT_F64 :
337           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
338     break;
339   case ISD::SETLE:
340   case ISD::SETOLE:
341     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
342           (VT == MVT::f64) ? RTLIB::OLE_F64 :
343           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
344     break;
345   case ISD::SETGT:
346   case ISD::SETOGT:
347     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
348           (VT == MVT::f64) ? RTLIB::OGT_F64 :
349           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
350     break;
351   case ISD::SETO:
352     ShouldInvertCC = true;
353     LLVM_FALLTHROUGH;
354   case ISD::SETUO:
355     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
356           (VT == MVT::f64) ? RTLIB::UO_F64 :
357           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
358     break;
359   case ISD::SETONE:
360     // SETONE = O && UNE
361     ShouldInvertCC = true;
362     LLVM_FALLTHROUGH;
363   case ISD::SETUEQ:
364     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
365           (VT == MVT::f64) ? RTLIB::UO_F64 :
366           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
367     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
368           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
369           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
370     break;
371   default:
372     // Invert CC for unordered comparisons
373     ShouldInvertCC = true;
374     switch (CCCode) {
375     case ISD::SETULT:
376       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
377             (VT == MVT::f64) ? RTLIB::OGE_F64 :
378             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
379       break;
380     case ISD::SETULE:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
382             (VT == MVT::f64) ? RTLIB::OGT_F64 :
383             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
384       break;
385     case ISD::SETUGT:
386       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
387             (VT == MVT::f64) ? RTLIB::OLE_F64 :
388             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
389       break;
390     case ISD::SETUGE:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
392             (VT == MVT::f64) ? RTLIB::OLT_F64 :
393             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
394       break;
395     default: llvm_unreachable("Do not know how to soften this setcc!");
396     }
397   }
398 
399   // Use the target specific return value for comparions lib calls.
400   EVT RetVT = getCmpLibcallReturnType();
401   SDValue Ops[2] = {NewLHS, NewRHS};
402   TargetLowering::MakeLibCallOptions CallOptions;
403   EVT OpsVT[2] = { OldLHS.getValueType(),
404                    OldRHS.getValueType() };
405   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
406   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
407   NewLHS = Call.first;
408   NewRHS = DAG.getConstant(0, dl, RetVT);
409 
410   CCCode = getCmpLibcallCC(LC1);
411   if (ShouldInvertCC) {
412     assert(RetVT.isInteger());
413     CCCode = getSetCCInverse(CCCode, RetVT);
414   }
415 
416   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
417     // Update Chain.
418     Chain = Call.second;
419   } else {
420     EVT SetCCVT =
421         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
422     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
423     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
424     CCCode = getCmpLibcallCC(LC2);
425     if (ShouldInvertCC)
426       CCCode = getSetCCInverse(CCCode, RetVT);
427     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
428     if (Chain)
429       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
430                           Call2.second);
431     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
432                          Tmp.getValueType(), Tmp, NewLHS);
433     NewRHS = SDValue();
434   }
435 }
436 
437 /// Return the entry encoding for a jump table in the current function. The
438 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
439 unsigned TargetLowering::getJumpTableEncoding() const {
440   // In non-pic modes, just use the address of a block.
441   if (!isPositionIndependent())
442     return MachineJumpTableInfo::EK_BlockAddress;
443 
444   // In PIC mode, if the target supports a GPRel32 directive, use it.
445   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
446     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
447 
448   // Otherwise, use a label difference.
449   return MachineJumpTableInfo::EK_LabelDifference32;
450 }
451 
452 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
453                                                  SelectionDAG &DAG) const {
454   // If our PIC model is GP relative, use the global offset table as the base.
455   unsigned JTEncoding = getJumpTableEncoding();
456 
457   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
458       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
459     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
460 
461   return Table;
462 }
463 
464 /// This returns the relocation base for the given PIC jumptable, the same as
465 /// getPICJumpTableRelocBase, but as an MCExpr.
466 const MCExpr *
467 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
468                                              unsigned JTI,MCContext &Ctx) const{
469   // The normal PIC reloc base is the label at the start of the jump table.
470   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
471 }
472 
473 bool
474 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
475   const TargetMachine &TM = getTargetMachine();
476   const GlobalValue *GV = GA->getGlobal();
477 
478   // If the address is not even local to this DSO we will have to load it from
479   // a got and then add the offset.
480   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
481     return false;
482 
483   // If the code is position independent we will have to add a base register.
484   if (isPositionIndependent())
485     return false;
486 
487   // Otherwise we can do it.
488   return true;
489 }
490 
491 //===----------------------------------------------------------------------===//
492 //  Optimization Methods
493 //===----------------------------------------------------------------------===//
494 
495 /// If the specified instruction has a constant integer operand and there are
496 /// bits set in that constant that are not demanded, then clear those bits and
497 /// return true.
498 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
499                                             const APInt &DemandedBits,
500                                             const APInt &DemandedElts,
501                                             TargetLoweringOpt &TLO) const {
502   SDLoc DL(Op);
503   unsigned Opcode = Op.getOpcode();
504 
505   // Do target-specific constant optimization.
506   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
507     return TLO.New.getNode();
508 
509   // FIXME: ISD::SELECT, ISD::SELECT_CC
510   switch (Opcode) {
511   default:
512     break;
513   case ISD::XOR:
514   case ISD::AND:
515   case ISD::OR: {
516     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
517     if (!Op1C || Op1C->isOpaque())
518       return false;
519 
520     // If this is a 'not' op, don't touch it because that's a canonical form.
521     const APInt &C = Op1C->getAPIntValue();
522     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
523       return false;
524 
525     if (!C.isSubsetOf(DemandedBits)) {
526       EVT VT = Op.getValueType();
527       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
528       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
529       return TLO.CombineTo(Op, NewOp);
530     }
531 
532     break;
533   }
534   }
535 
536   return false;
537 }
538 
539 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
540                                             const APInt &DemandedBits,
541                                             TargetLoweringOpt &TLO) const {
542   EVT VT = Op.getValueType();
543   APInt DemandedElts = VT.isVector()
544                            ? APInt::getAllOnes(VT.getVectorNumElements())
545                            : APInt(1, 1);
546   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
547 }
548 
549 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
550 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
551 /// generalized for targets with other types of implicit widening casts.
552 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
553                                       const APInt &Demanded,
554                                       TargetLoweringOpt &TLO) const {
555   assert(Op.getNumOperands() == 2 &&
556          "ShrinkDemandedOp only supports binary operators!");
557   assert(Op.getNode()->getNumValues() == 1 &&
558          "ShrinkDemandedOp only supports nodes with one result!");
559 
560   SelectionDAG &DAG = TLO.DAG;
561   SDLoc dl(Op);
562 
563   // Early return, as this function cannot handle vector types.
564   if (Op.getValueType().isVector())
565     return false;
566 
567   // Don't do this if the node has another user, which may require the
568   // full value.
569   if (!Op.getNode()->hasOneUse())
570     return false;
571 
572   // Search for the smallest integer type with free casts to and from
573   // Op's type. For expedience, just check power-of-2 integer types.
574   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
575   unsigned DemandedSize = Demanded.getActiveBits();
576   unsigned SmallVTBits = DemandedSize;
577   if (!isPowerOf2_32(SmallVTBits))
578     SmallVTBits = NextPowerOf2(SmallVTBits);
579   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
580     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
581     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
582         TLI.isZExtFree(SmallVT, Op.getValueType())) {
583       // We found a type with free casts.
584       SDValue X = DAG.getNode(
585           Op.getOpcode(), dl, SmallVT,
586           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
587           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
588       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
589       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
590       return TLO.CombineTo(Op, Z);
591     }
592   }
593   return false;
594 }
595 
596 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
597                                           DAGCombinerInfo &DCI) const {
598   SelectionDAG &DAG = DCI.DAG;
599   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
600                         !DCI.isBeforeLegalizeOps());
601   KnownBits Known;
602 
603   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
604   if (Simplified) {
605     DCI.AddToWorklist(Op.getNode());
606     DCI.CommitTargetLoweringOpt(TLO);
607   }
608   return Simplified;
609 }
610 
611 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
612                                           const APInt &DemandedElts,
613                                           DAGCombinerInfo &DCI) const {
614   SelectionDAG &DAG = DCI.DAG;
615   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
616                         !DCI.isBeforeLegalizeOps());
617   KnownBits Known;
618 
619   bool Simplified =
620       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
621   if (Simplified) {
622     DCI.AddToWorklist(Op.getNode());
623     DCI.CommitTargetLoweringOpt(TLO);
624   }
625   return Simplified;
626 }
627 
628 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
629                                           KnownBits &Known,
630                                           TargetLoweringOpt &TLO,
631                                           unsigned Depth,
632                                           bool AssumeSingleUse) const {
633   EVT VT = Op.getValueType();
634 
635   // TODO: We can probably do more work on calculating the known bits and
636   // simplifying the operations for scalable vectors, but for now we just
637   // bail out.
638   if (VT.isScalableVector()) {
639     // Pretend we don't know anything for now.
640     Known = KnownBits(DemandedBits.getBitWidth());
641     return false;
642   }
643 
644   APInt DemandedElts = VT.isVector()
645                            ? APInt::getAllOnes(VT.getVectorNumElements())
646                            : APInt(1, 1);
647   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
648                               AssumeSingleUse);
649 }
650 
651 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
652 // TODO: Under what circumstances can we create nodes? Constant folding?
653 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
654     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
655     SelectionDAG &DAG, unsigned Depth) const {
656   // Limit search depth.
657   if (Depth >= SelectionDAG::MaxRecursionDepth)
658     return SDValue();
659 
660   // Ignore UNDEFs.
661   if (Op.isUndef())
662     return SDValue();
663 
664   // Not demanding any bits/elts from Op.
665   if (DemandedBits == 0 || DemandedElts == 0)
666     return DAG.getUNDEF(Op.getValueType());
667 
668   bool IsLE = DAG.getDataLayout().isLittleEndian();
669   unsigned NumElts = DemandedElts.getBitWidth();
670   unsigned BitWidth = DemandedBits.getBitWidth();
671   KnownBits LHSKnown, RHSKnown;
672   switch (Op.getOpcode()) {
673   case ISD::BITCAST: {
674     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
675     EVT SrcVT = Src.getValueType();
676     EVT DstVT = Op.getValueType();
677     if (SrcVT == DstVT)
678       return Src;
679 
680     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
681     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
682     if (NumSrcEltBits == NumDstEltBits)
683       if (SDValue V = SimplifyMultipleUseDemandedBits(
684               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
685         return DAG.getBitcast(DstVT, V);
686 
687     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
688       unsigned Scale = NumDstEltBits / NumSrcEltBits;
689       unsigned NumSrcElts = SrcVT.getVectorNumElements();
690       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
691       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
692       for (unsigned i = 0; i != Scale; ++i) {
693         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
694         unsigned BitOffset = EltOffset * NumSrcEltBits;
695         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
696         if (!Sub.isZero()) {
697           DemandedSrcBits |= Sub;
698           for (unsigned j = 0; j != NumElts; ++j)
699             if (DemandedElts[j])
700               DemandedSrcElts.setBit((j * Scale) + i);
701         }
702       }
703 
704       if (SDValue V = SimplifyMultipleUseDemandedBits(
705               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
706         return DAG.getBitcast(DstVT, V);
707     }
708 
709     // TODO - bigendian once we have test coverage.
710     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
711       unsigned Scale = NumSrcEltBits / NumDstEltBits;
712       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
713       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
714       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
715       for (unsigned i = 0; i != NumElts; ++i)
716         if (DemandedElts[i]) {
717           unsigned Offset = (i % Scale) * NumDstEltBits;
718           DemandedSrcBits.insertBits(DemandedBits, Offset);
719           DemandedSrcElts.setBit(i / Scale);
720         }
721 
722       if (SDValue V = SimplifyMultipleUseDemandedBits(
723               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
724         return DAG.getBitcast(DstVT, V);
725     }
726 
727     break;
728   }
729   case ISD::AND: {
730     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
731     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
732 
733     // If all of the demanded bits are known 1 on one side, return the other.
734     // These bits cannot contribute to the result of the 'and' in this
735     // context.
736     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
737       return Op.getOperand(0);
738     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
739       return Op.getOperand(1);
740     break;
741   }
742   case ISD::OR: {
743     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
744     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
745 
746     // If all of the demanded bits are known zero on one side, return the
747     // other.  These bits cannot contribute to the result of the 'or' in this
748     // context.
749     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
750       return Op.getOperand(0);
751     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
752       return Op.getOperand(1);
753     break;
754   }
755   case ISD::XOR: {
756     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
757     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
758 
759     // If all of the demanded bits are known zero on one side, return the
760     // other.
761     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
762       return Op.getOperand(0);
763     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
764       return Op.getOperand(1);
765     break;
766   }
767   case ISD::SHL: {
768     // If we are only demanding sign bits then we can use the shift source
769     // directly.
770     if (const APInt *MaxSA =
771             DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
772       SDValue Op0 = Op.getOperand(0);
773       unsigned ShAmt = MaxSA->getZExtValue();
774       unsigned NumSignBits =
775           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
776       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
777       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
778         return Op0;
779     }
780     break;
781   }
782   case ISD::SETCC: {
783     SDValue Op0 = Op.getOperand(0);
784     SDValue Op1 = Op.getOperand(1);
785     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
786     // If (1) we only need the sign-bit, (2) the setcc operands are the same
787     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
788     // -1, we may be able to bypass the setcc.
789     if (DemandedBits.isSignMask() &&
790         Op0.getScalarValueSizeInBits() == BitWidth &&
791         getBooleanContents(Op0.getValueType()) ==
792             BooleanContent::ZeroOrNegativeOneBooleanContent) {
793       // If we're testing X < 0, then this compare isn't needed - just use X!
794       // FIXME: We're limiting to integer types here, but this should also work
795       // if we don't care about FP signed-zero. The use of SETLT with FP means
796       // that we don't care about NaNs.
797       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
798           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
799         return Op0;
800     }
801     break;
802   }
803   case ISD::SIGN_EXTEND_INREG: {
804     // If none of the extended bits are demanded, eliminate the sextinreg.
805     SDValue Op0 = Op.getOperand(0);
806     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
807     unsigned ExBits = ExVT.getScalarSizeInBits();
808     if (DemandedBits.getActiveBits() <= ExBits)
809       return Op0;
810     // If the input is already sign extended, just drop the extension.
811     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
812     if (NumSignBits >= (BitWidth - ExBits + 1))
813       return Op0;
814     break;
815   }
816   case ISD::ANY_EXTEND_VECTOR_INREG:
817   case ISD::SIGN_EXTEND_VECTOR_INREG:
818   case ISD::ZERO_EXTEND_VECTOR_INREG: {
819     // If we only want the lowest element and none of extended bits, then we can
820     // return the bitcasted source vector.
821     SDValue Src = Op.getOperand(0);
822     EVT SrcVT = Src.getValueType();
823     EVT DstVT = Op.getValueType();
824     if (IsLE && DemandedElts == 1 &&
825         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
826         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
827       return DAG.getBitcast(DstVT, Src);
828     }
829     break;
830   }
831   case ISD::INSERT_VECTOR_ELT: {
832     // If we don't demand the inserted element, return the base vector.
833     SDValue Vec = Op.getOperand(0);
834     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
835     EVT VecVT = Vec.getValueType();
836     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
837         !DemandedElts[CIdx->getZExtValue()])
838       return Vec;
839     break;
840   }
841   case ISD::INSERT_SUBVECTOR: {
842     SDValue Vec = Op.getOperand(0);
843     SDValue Sub = Op.getOperand(1);
844     uint64_t Idx = Op.getConstantOperandVal(2);
845     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
846     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
847     // If we don't demand the inserted subvector, return the base vector.
848     if (DemandedSubElts == 0)
849       return Vec;
850     // If this simply widens the lowest subvector, see if we can do it earlier.
851     if (Idx == 0 && Vec.isUndef()) {
852       if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
853               Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
854         return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
855                            Op.getOperand(0), NewSub, Op.getOperand(2));
856     }
857     break;
858   }
859   case ISD::VECTOR_SHUFFLE: {
860     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
861 
862     // If all the demanded elts are from one operand and are inline,
863     // then we can use the operand directly.
864     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
865     for (unsigned i = 0; i != NumElts; ++i) {
866       int M = ShuffleMask[i];
867       if (M < 0 || !DemandedElts[i])
868         continue;
869       AllUndef = false;
870       IdentityLHS &= (M == (int)i);
871       IdentityRHS &= ((M - NumElts) == i);
872     }
873 
874     if (AllUndef)
875       return DAG.getUNDEF(Op.getValueType());
876     if (IdentityLHS)
877       return Op.getOperand(0);
878     if (IdentityRHS)
879       return Op.getOperand(1);
880     break;
881   }
882   default:
883     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
884       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
885               Op, DemandedBits, DemandedElts, DAG, Depth))
886         return V;
887     break;
888   }
889   return SDValue();
890 }
891 
892 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
893     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
894     unsigned Depth) const {
895   EVT VT = Op.getValueType();
896   APInt DemandedElts = VT.isVector()
897                            ? APInt::getAllOnes(VT.getVectorNumElements())
898                            : APInt(1, 1);
899   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
900                                          Depth);
901 }
902 
903 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
904     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
905     unsigned Depth) const {
906   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
907   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
908                                          Depth);
909 }
910 
911 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
912 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
913 static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
914                                  const TargetLowering &TLI,
915                                  const APInt &DemandedBits,
916                                  const APInt &DemandedElts,
917                                  unsigned Depth) {
918   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
919          "SRL or SRA node is required here!");
920   // Is the right shift using an immediate value of 1?
921   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
922   if (!N1C || !N1C->isOne())
923     return SDValue();
924 
925   // We are looking for an avgfloor
926   // add(ext, ext)
927   // or one of these as a avgceil
928   // add(add(ext, ext), 1)
929   // add(add(ext, 1), ext)
930   // add(ext, add(ext, 1))
931   SDValue Add = Op.getOperand(0);
932   if (Add.getOpcode() != ISD::ADD)
933     return SDValue();
934 
935   SDValue ExtOpA = Add.getOperand(0);
936   SDValue ExtOpB = Add.getOperand(1);
937   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
938     ConstantSDNode *ConstOp;
939     if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
940         ConstOp->isOne()) {
941       ExtOpA = Op2;
942       ExtOpB = Op3;
943       return true;
944     }
945     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
946         ConstOp->isOne()) {
947       ExtOpA = Op1;
948       ExtOpB = Op3;
949       return true;
950     }
951     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
952         ConstOp->isOne()) {
953       ExtOpA = Op1;
954       ExtOpB = Op2;
955       return true;
956     }
957     return false;
958   };
959   bool IsCeil =
960       (ExtOpA.getOpcode() == ISD::ADD &&
961        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
962       (ExtOpB.getOpcode() == ISD::ADD &&
963        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
964 
965   // If the shift is signed (sra):
966   //  - Needs >= 2 sign bit for both operands.
967   //  - Needs >= 2 zero bits.
968   // If the shift is unsigned (srl):
969   //  - Needs >= 1 zero bit for both operands.
970   //  - Needs 1 demanded bit zero and >= 2 sign bits.
971   unsigned ShiftOpc = Op.getOpcode();
972   bool IsSigned = false;
973   unsigned KnownBits;
974   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
975   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
976   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
977   unsigned NumZeroA =
978       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
979   unsigned NumZeroB =
980       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
981   unsigned NumZero = std::min(NumZeroA, NumZeroB);
982 
983   switch (ShiftOpc) {
984   default:
985     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
986   case ISD::SRA: {
987     if (NumZero >= 2 && NumSigned < NumZero) {
988       IsSigned = false;
989       KnownBits = NumZero;
990       break;
991     }
992     if (NumSigned >= 1) {
993       IsSigned = true;
994       KnownBits = NumSigned;
995       break;
996     }
997     return SDValue();
998   }
999   case ISD::SRL: {
1000     if (NumZero >= 1 && NumSigned < NumZero) {
1001       IsSigned = false;
1002       KnownBits = NumZero;
1003       break;
1004     }
1005     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1006       IsSigned = true;
1007       KnownBits = NumSigned;
1008       break;
1009     }
1010     return SDValue();
1011   }
1012   }
1013 
1014   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1015                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1016 
1017   // Find the smallest power-2 type that is legal for this vector size and
1018   // operation, given the original type size and the number of known sign/zero
1019   // bits.
1020   EVT VT = Op.getValueType();
1021   unsigned MinWidth =
1022       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1023   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1024   if (VT.isVector())
1025     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1026   if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1027     return SDValue();
1028 
1029   SDLoc DL(Op);
1030   SDValue ResultAVG =
1031       DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1032                   DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1033   return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1034                      ResultAVG);
1035 }
1036 
1037 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1038 /// result of Op are ever used downstream. If we can use this information to
1039 /// simplify Op, create a new simplified DAG node and return true, returning the
1040 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1041 /// return a mask of Known bits for the expression (used to simplify the
1042 /// caller).  The Known bits may only be accurate for those bits in the
1043 /// OriginalDemandedBits and OriginalDemandedElts.
1044 bool TargetLowering::SimplifyDemandedBits(
1045     SDValue Op, const APInt &OriginalDemandedBits,
1046     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1047     unsigned Depth, bool AssumeSingleUse) const {
1048   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1049   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1050          "Mask size mismatches value type size!");
1051 
1052   // Don't know anything.
1053   Known = KnownBits(BitWidth);
1054 
1055   // TODO: We can probably do more work on calculating the known bits and
1056   // simplifying the operations for scalable vectors, but for now we just
1057   // bail out.
1058   if (Op.getValueType().isScalableVector())
1059     return false;
1060 
1061   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1062   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1063   assert((!Op.getValueType().isVector() ||
1064           NumElts == Op.getValueType().getVectorNumElements()) &&
1065          "Unexpected vector size");
1066 
1067   APInt DemandedBits = OriginalDemandedBits;
1068   APInt DemandedElts = OriginalDemandedElts;
1069   SDLoc dl(Op);
1070   auto &DL = TLO.DAG.getDataLayout();
1071 
1072   // Undef operand.
1073   if (Op.isUndef())
1074     return false;
1075 
1076   if (Op.getOpcode() == ISD::Constant) {
1077     // We know all of the bits for a constant!
1078     Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1079     return false;
1080   }
1081 
1082   if (Op.getOpcode() == ISD::ConstantFP) {
1083     // We know all of the bits for a floating point constant!
1084     Known = KnownBits::makeConstant(
1085         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1086     return false;
1087   }
1088 
1089   // Other users may use these bits.
1090   EVT VT = Op.getValueType();
1091   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
1092     if (Depth != 0) {
1093       // If not at the root, Just compute the Known bits to
1094       // simplify things downstream.
1095       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1096       return false;
1097     }
1098     // If this is the root being simplified, allow it to have multiple uses,
1099     // just set the DemandedBits/Elts to all bits.
1100     DemandedBits = APInt::getAllOnes(BitWidth);
1101     DemandedElts = APInt::getAllOnes(NumElts);
1102   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1103     // Not demanding any bits/elts from Op.
1104     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1105   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1106     // Limit search depth.
1107     return false;
1108   }
1109 
1110   KnownBits Known2;
1111   switch (Op.getOpcode()) {
1112   case ISD::TargetConstant:
1113     llvm_unreachable("Can't simplify this node");
1114   case ISD::SCALAR_TO_VECTOR: {
1115     if (!DemandedElts[0])
1116       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1117 
1118     KnownBits SrcKnown;
1119     SDValue Src = Op.getOperand(0);
1120     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1121     APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1122     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1123       return true;
1124 
1125     // Upper elements are undef, so only get the knownbits if we just demand
1126     // the bottom element.
1127     if (DemandedElts == 1)
1128       Known = SrcKnown.anyextOrTrunc(BitWidth);
1129     break;
1130   }
1131   case ISD::BUILD_VECTOR:
1132     // Collect the known bits that are shared by every demanded element.
1133     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1134     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1135     return false; // Don't fall through, will infinitely loop.
1136   case ISD::LOAD: {
1137     auto *LD = cast<LoadSDNode>(Op);
1138     if (getTargetConstantFromLoad(LD)) {
1139       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1140       return false; // Don't fall through, will infinitely loop.
1141     }
1142     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1143       // If this is a ZEXTLoad and we are looking at the loaded value.
1144       EVT MemVT = LD->getMemoryVT();
1145       unsigned MemBits = MemVT.getScalarSizeInBits();
1146       Known.Zero.setBitsFrom(MemBits);
1147       return false; // Don't fall through, will infinitely loop.
1148     }
1149     break;
1150   }
1151   case ISD::INSERT_VECTOR_ELT: {
1152     SDValue Vec = Op.getOperand(0);
1153     SDValue Scl = Op.getOperand(1);
1154     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1155     EVT VecVT = Vec.getValueType();
1156 
1157     // If index isn't constant, assume we need all vector elements AND the
1158     // inserted element.
1159     APInt DemandedVecElts(DemandedElts);
1160     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1161       unsigned Idx = CIdx->getZExtValue();
1162       DemandedVecElts.clearBit(Idx);
1163 
1164       // Inserted element is not required.
1165       if (!DemandedElts[Idx])
1166         return TLO.CombineTo(Op, Vec);
1167     }
1168 
1169     KnownBits KnownScl;
1170     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1171     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1172     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1173       return true;
1174 
1175     Known = KnownScl.anyextOrTrunc(BitWidth);
1176 
1177     KnownBits KnownVec;
1178     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1179                              Depth + 1))
1180       return true;
1181 
1182     if (!!DemandedVecElts)
1183       Known = KnownBits::commonBits(Known, KnownVec);
1184 
1185     return false;
1186   }
1187   case ISD::INSERT_SUBVECTOR: {
1188     // Demand any elements from the subvector and the remainder from the src its
1189     // inserted into.
1190     SDValue Src = Op.getOperand(0);
1191     SDValue Sub = Op.getOperand(1);
1192     uint64_t Idx = Op.getConstantOperandVal(2);
1193     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1194     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1195     APInt DemandedSrcElts = DemandedElts;
1196     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1197 
1198     KnownBits KnownSub, KnownSrc;
1199     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1200                              Depth + 1))
1201       return true;
1202     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1203                              Depth + 1))
1204       return true;
1205 
1206     Known.Zero.setAllBits();
1207     Known.One.setAllBits();
1208     if (!!DemandedSubElts)
1209       Known = KnownBits::commonBits(Known, KnownSub);
1210     if (!!DemandedSrcElts)
1211       Known = KnownBits::commonBits(Known, KnownSrc);
1212 
1213     // Attempt to avoid multi-use src if we don't need anything from it.
1214     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1215         !DemandedSrcElts.isAllOnes()) {
1216       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1217           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1218       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1219           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1220       if (NewSub || NewSrc) {
1221         NewSub = NewSub ? NewSub : Sub;
1222         NewSrc = NewSrc ? NewSrc : Src;
1223         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1224                                         Op.getOperand(2));
1225         return TLO.CombineTo(Op, NewOp);
1226       }
1227     }
1228     break;
1229   }
1230   case ISD::EXTRACT_SUBVECTOR: {
1231     // Offset the demanded elts by the subvector index.
1232     SDValue Src = Op.getOperand(0);
1233     if (Src.getValueType().isScalableVector())
1234       break;
1235     uint64_t Idx = Op.getConstantOperandVal(1);
1236     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1237     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1238 
1239     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1240                              Depth + 1))
1241       return true;
1242 
1243     // Attempt to avoid multi-use src if we don't need anything from it.
1244     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1245       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1246           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1247       if (DemandedSrc) {
1248         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1249                                         Op.getOperand(1));
1250         return TLO.CombineTo(Op, NewOp);
1251       }
1252     }
1253     break;
1254   }
1255   case ISD::CONCAT_VECTORS: {
1256     Known.Zero.setAllBits();
1257     Known.One.setAllBits();
1258     EVT SubVT = Op.getOperand(0).getValueType();
1259     unsigned NumSubVecs = Op.getNumOperands();
1260     unsigned NumSubElts = SubVT.getVectorNumElements();
1261     for (unsigned i = 0; i != NumSubVecs; ++i) {
1262       APInt DemandedSubElts =
1263           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1264       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1265                                Known2, TLO, Depth + 1))
1266         return true;
1267       // Known bits are shared by every demanded subvector element.
1268       if (!!DemandedSubElts)
1269         Known = KnownBits::commonBits(Known, Known2);
1270     }
1271     break;
1272   }
1273   case ISD::VECTOR_SHUFFLE: {
1274     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1275 
1276     // Collect demanded elements from shuffle operands..
1277     APInt DemandedLHS(NumElts, 0);
1278     APInt DemandedRHS(NumElts, 0);
1279     for (unsigned i = 0; i != NumElts; ++i) {
1280       if (!DemandedElts[i])
1281         continue;
1282       int M = ShuffleMask[i];
1283       if (M < 0) {
1284         // For UNDEF elements, we don't know anything about the common state of
1285         // the shuffle result.
1286         DemandedLHS.clearAllBits();
1287         DemandedRHS.clearAllBits();
1288         break;
1289       }
1290       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1291       if (M < (int)NumElts)
1292         DemandedLHS.setBit(M);
1293       else
1294         DemandedRHS.setBit(M - NumElts);
1295     }
1296 
1297     if (!!DemandedLHS || !!DemandedRHS) {
1298       SDValue Op0 = Op.getOperand(0);
1299       SDValue Op1 = Op.getOperand(1);
1300 
1301       Known.Zero.setAllBits();
1302       Known.One.setAllBits();
1303       if (!!DemandedLHS) {
1304         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1305                                  Depth + 1))
1306           return true;
1307         Known = KnownBits::commonBits(Known, Known2);
1308       }
1309       if (!!DemandedRHS) {
1310         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1311                                  Depth + 1))
1312           return true;
1313         Known = KnownBits::commonBits(Known, Known2);
1314       }
1315 
1316       // Attempt to avoid multi-use ops if we don't need anything from them.
1317       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1318           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1319       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1320           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1321       if (DemandedOp0 || DemandedOp1) {
1322         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1323         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1324         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1325         return TLO.CombineTo(Op, NewOp);
1326       }
1327     }
1328     break;
1329   }
1330   case ISD::AND: {
1331     SDValue Op0 = Op.getOperand(0);
1332     SDValue Op1 = Op.getOperand(1);
1333 
1334     // If the RHS is a constant, check to see if the LHS would be zero without
1335     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1336     // simplify the LHS, here we're using information from the LHS to simplify
1337     // the RHS.
1338     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1339       // Do not increment Depth here; that can cause an infinite loop.
1340       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1341       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1342       if ((LHSKnown.Zero & DemandedBits) ==
1343           (~RHSC->getAPIntValue() & DemandedBits))
1344         return TLO.CombineTo(Op, Op0);
1345 
1346       // If any of the set bits in the RHS are known zero on the LHS, shrink
1347       // the constant.
1348       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1349                                  DemandedElts, TLO))
1350         return true;
1351 
1352       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1353       // constant, but if this 'and' is only clearing bits that were just set by
1354       // the xor, then this 'and' can be eliminated by shrinking the mask of
1355       // the xor. For example, for a 32-bit X:
1356       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1357       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1358           LHSKnown.One == ~RHSC->getAPIntValue()) {
1359         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1360         return TLO.CombineTo(Op, Xor);
1361       }
1362     }
1363 
1364     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1365                              Depth + 1))
1366       return true;
1367     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1368     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1369                              Known2, TLO, Depth + 1))
1370       return true;
1371     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1372 
1373     // Attempt to avoid multi-use ops if we don't need anything from them.
1374     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1375       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1377       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1379       if (DemandedOp0 || DemandedOp1) {
1380         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1383         return TLO.CombineTo(Op, NewOp);
1384       }
1385     }
1386 
1387     // If all of the demanded bits are known one on one side, return the other.
1388     // These bits cannot contribute to the result of the 'and'.
1389     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1390       return TLO.CombineTo(Op, Op0);
1391     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1392       return TLO.CombineTo(Op, Op1);
1393     // If all of the demanded bits in the inputs are known zeros, return zero.
1394     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1395       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1396     // If the RHS is a constant, see if we can simplify it.
1397     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1398                                TLO))
1399       return true;
1400     // If the operation can be done in a smaller type, do so.
1401     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1402       return true;
1403 
1404     Known &= Known2;
1405     break;
1406   }
1407   case ISD::OR: {
1408     SDValue Op0 = Op.getOperand(0);
1409     SDValue Op1 = Op.getOperand(1);
1410 
1411     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1412                              Depth + 1))
1413       return true;
1414     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1415     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1416                              Known2, TLO, Depth + 1))
1417       return true;
1418     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1419 
1420     // Attempt to avoid multi-use ops if we don't need anything from them.
1421     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1422       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1423           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1424       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1425           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1426       if (DemandedOp0 || DemandedOp1) {
1427         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1428         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1429         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1430         return TLO.CombineTo(Op, NewOp);
1431       }
1432     }
1433 
1434     // If all of the demanded bits are known zero on one side, return the other.
1435     // These bits cannot contribute to the result of the 'or'.
1436     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1437       return TLO.CombineTo(Op, Op0);
1438     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1439       return TLO.CombineTo(Op, Op1);
1440     // If the RHS is a constant, see if we can simplify it.
1441     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1442       return true;
1443     // If the operation can be done in a smaller type, do so.
1444     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1445       return true;
1446 
1447     Known |= Known2;
1448     break;
1449   }
1450   case ISD::XOR: {
1451     SDValue Op0 = Op.getOperand(0);
1452     SDValue Op1 = Op.getOperand(1);
1453 
1454     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1455                              Depth + 1))
1456       return true;
1457     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1458     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1459                              Depth + 1))
1460       return true;
1461     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1462 
1463     // Attempt to avoid multi-use ops if we don't need anything from them.
1464     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1465       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1466           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1467       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1468           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1469       if (DemandedOp0 || DemandedOp1) {
1470         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1471         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1472         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1473         return TLO.CombineTo(Op, NewOp);
1474       }
1475     }
1476 
1477     // If all of the demanded bits are known zero on one side, return the other.
1478     // These bits cannot contribute to the result of the 'xor'.
1479     if (DemandedBits.isSubsetOf(Known.Zero))
1480       return TLO.CombineTo(Op, Op0);
1481     if (DemandedBits.isSubsetOf(Known2.Zero))
1482       return TLO.CombineTo(Op, Op1);
1483     // If the operation can be done in a smaller type, do so.
1484     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1485       return true;
1486 
1487     // If all of the unknown bits are known to be zero on one side or the other
1488     // turn this into an *inclusive* or.
1489     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1490     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1491       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1492 
1493     ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1494     if (C) {
1495       // If one side is a constant, and all of the set bits in the constant are
1496       // also known set on the other side, turn this into an AND, as we know
1497       // the bits will be cleared.
1498       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1499       // NB: it is okay if more bits are known than are requested
1500       if (C->getAPIntValue() == Known2.One) {
1501         SDValue ANDC =
1502             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1503         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1504       }
1505 
1506       // If the RHS is a constant, see if we can change it. Don't alter a -1
1507       // constant because that's a 'not' op, and that is better for combining
1508       // and codegen.
1509       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1510         // We're flipping all demanded bits. Flip the undemanded bits too.
1511         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1512         return TLO.CombineTo(Op, New);
1513       }
1514     }
1515 
1516     // If we can't turn this into a 'not', try to shrink the constant.
1517     if (!C || !C->isAllOnes())
1518       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1519         return true;
1520 
1521     Known ^= Known2;
1522     break;
1523   }
1524   case ISD::SELECT:
1525     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1526                              Depth + 1))
1527       return true;
1528     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1529                              Depth + 1))
1530       return true;
1531     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1532     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1533 
1534     // If the operands are constants, see if we can simplify them.
1535     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1536       return true;
1537 
1538     // Only known if known in both the LHS and RHS.
1539     Known = KnownBits::commonBits(Known, Known2);
1540     break;
1541   case ISD::SELECT_CC:
1542     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1543                              Depth + 1))
1544       return true;
1545     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1546                              Depth + 1))
1547       return true;
1548     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1549     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1550 
1551     // If the operands are constants, see if we can simplify them.
1552     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1553       return true;
1554 
1555     // Only known if known in both the LHS and RHS.
1556     Known = KnownBits::commonBits(Known, Known2);
1557     break;
1558   case ISD::SETCC: {
1559     SDValue Op0 = Op.getOperand(0);
1560     SDValue Op1 = Op.getOperand(1);
1561     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1562     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1563     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1564     // -1, we may be able to bypass the setcc.
1565     if (DemandedBits.isSignMask() &&
1566         Op0.getScalarValueSizeInBits() == BitWidth &&
1567         getBooleanContents(Op0.getValueType()) ==
1568             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1569       // If we're testing X < 0, then this compare isn't needed - just use X!
1570       // FIXME: We're limiting to integer types here, but this should also work
1571       // if we don't care about FP signed-zero. The use of SETLT with FP means
1572       // that we don't care about NaNs.
1573       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1574           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1575         return TLO.CombineTo(Op, Op0);
1576 
1577       // TODO: Should we check for other forms of sign-bit comparisons?
1578       // Examples: X <= -1, X >= 0
1579     }
1580     if (getBooleanContents(Op0.getValueType()) ==
1581             TargetLowering::ZeroOrOneBooleanContent &&
1582         BitWidth > 1)
1583       Known.Zero.setBitsFrom(1);
1584     break;
1585   }
1586   case ISD::SHL: {
1587     SDValue Op0 = Op.getOperand(0);
1588     SDValue Op1 = Op.getOperand(1);
1589     EVT ShiftVT = Op1.getValueType();
1590 
1591     if (const APInt *SA =
1592             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1593       unsigned ShAmt = SA->getZExtValue();
1594       if (ShAmt == 0)
1595         return TLO.CombineTo(Op, Op0);
1596 
1597       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1598       // single shift.  We can do this if the bottom bits (which are shifted
1599       // out) are never demanded.
1600       // TODO - support non-uniform vector amounts.
1601       if (Op0.getOpcode() == ISD::SRL) {
1602         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1603           if (const APInt *SA2 =
1604                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1605             unsigned C1 = SA2->getZExtValue();
1606             unsigned Opc = ISD::SHL;
1607             int Diff = ShAmt - C1;
1608             if (Diff < 0) {
1609               Diff = -Diff;
1610               Opc = ISD::SRL;
1611             }
1612             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1613             return TLO.CombineTo(
1614                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1615           }
1616         }
1617       }
1618 
1619       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1620       // are not demanded. This will likely allow the anyext to be folded away.
1621       // TODO - support non-uniform vector amounts.
1622       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1623         SDValue InnerOp = Op0.getOperand(0);
1624         EVT InnerVT = InnerOp.getValueType();
1625         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1626         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1627             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1628           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1629           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1630             ShTy = InnerVT;
1631           SDValue NarrowShl =
1632               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1633                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1634           return TLO.CombineTo(
1635               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1636         }
1637 
1638         // Repeat the SHL optimization above in cases where an extension
1639         // intervenes: (shl (anyext (shr x, c1)), c2) to
1640         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1641         // aren't demanded (as above) and that the shifted upper c1 bits of
1642         // x aren't demanded.
1643         // TODO - support non-uniform vector amounts.
1644         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1645             InnerOp.hasOneUse()) {
1646           if (const APInt *SA2 =
1647                   TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1648             unsigned InnerShAmt = SA2->getZExtValue();
1649             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1650                 DemandedBits.getActiveBits() <=
1651                     (InnerBits - InnerShAmt + ShAmt) &&
1652                 DemandedBits.countTrailingZeros() >= ShAmt) {
1653               SDValue NewSA =
1654                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1655               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1656                                                InnerOp.getOperand(0));
1657               return TLO.CombineTo(
1658                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1659             }
1660           }
1661         }
1662       }
1663 
1664       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1665       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1666                                Depth + 1))
1667         return true;
1668       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1669       Known.Zero <<= ShAmt;
1670       Known.One <<= ShAmt;
1671       // low bits known zero.
1672       Known.Zero.setLowBits(ShAmt);
1673 
1674       // Attempt to avoid multi-use ops if we don't need anything from them.
1675       if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1676         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1677             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1678         if (DemandedOp0) {
1679           SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1680           return TLO.CombineTo(Op, NewOp);
1681         }
1682       }
1683 
1684       // Try shrinking the operation as long as the shift amount will still be
1685       // in range.
1686       if ((ShAmt < DemandedBits.getActiveBits()) &&
1687           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1688         return true;
1689     }
1690 
1691     // If we are only demanding sign bits then we can use the shift source
1692     // directly.
1693     if (const APInt *MaxSA =
1694             TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1695       unsigned ShAmt = MaxSA->getZExtValue();
1696       unsigned NumSignBits =
1697           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1698       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1699       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1700         return TLO.CombineTo(Op, Op0);
1701     }
1702     break;
1703   }
1704   case ISD::SRL: {
1705     SDValue Op0 = Op.getOperand(0);
1706     SDValue Op1 = Op.getOperand(1);
1707     EVT ShiftVT = Op1.getValueType();
1708 
1709     // Try to match AVG patterns.
1710     if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1711                                         DemandedElts, Depth + 1))
1712       return TLO.CombineTo(Op, AVG);
1713 
1714     if (const APInt *SA =
1715             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1716       unsigned ShAmt = SA->getZExtValue();
1717       if (ShAmt == 0)
1718         return TLO.CombineTo(Op, Op0);
1719 
1720       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1721       // single shift.  We can do this if the top bits (which are shifted out)
1722       // are never demanded.
1723       // TODO - support non-uniform vector amounts.
1724       if (Op0.getOpcode() == ISD::SHL) {
1725         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1726           if (const APInt *SA2 =
1727                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1728             unsigned C1 = SA2->getZExtValue();
1729             unsigned Opc = ISD::SRL;
1730             int Diff = ShAmt - C1;
1731             if (Diff < 0) {
1732               Diff = -Diff;
1733               Opc = ISD::SHL;
1734             }
1735             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1736             return TLO.CombineTo(
1737                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1738           }
1739         }
1740       }
1741 
1742       APInt InDemandedMask = (DemandedBits << ShAmt);
1743 
1744       // If the shift is exact, then it does demand the low bits (and knows that
1745       // they are zero).
1746       if (Op->getFlags().hasExact())
1747         InDemandedMask.setLowBits(ShAmt);
1748 
1749       // Compute the new bits that are at the top now.
1750       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1751                                Depth + 1))
1752         return true;
1753       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1754       Known.Zero.lshrInPlace(ShAmt);
1755       Known.One.lshrInPlace(ShAmt);
1756       // High bits known zero.
1757       Known.Zero.setHighBits(ShAmt);
1758     }
1759     break;
1760   }
1761   case ISD::SRA: {
1762     SDValue Op0 = Op.getOperand(0);
1763     SDValue Op1 = Op.getOperand(1);
1764     EVT ShiftVT = Op1.getValueType();
1765 
1766     // If we only want bits that already match the signbit then we don't need
1767     // to shift.
1768     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1769     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1770         NumHiDemandedBits)
1771       return TLO.CombineTo(Op, Op0);
1772 
1773     // If this is an arithmetic shift right and only the low-bit is set, we can
1774     // always convert this into a logical shr, even if the shift amount is
1775     // variable.  The low bit of the shift cannot be an input sign bit unless
1776     // the shift amount is >= the size of the datatype, which is undefined.
1777     if (DemandedBits.isOne())
1778       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1779 
1780     // Try to match AVG patterns.
1781     if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1782                                         DemandedElts, Depth + 1))
1783       return TLO.CombineTo(Op, AVG);
1784 
1785     if (const APInt *SA =
1786             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1787       unsigned ShAmt = SA->getZExtValue();
1788       if (ShAmt == 0)
1789         return TLO.CombineTo(Op, Op0);
1790 
1791       APInt InDemandedMask = (DemandedBits << ShAmt);
1792 
1793       // If the shift is exact, then it does demand the low bits (and knows that
1794       // they are zero).
1795       if (Op->getFlags().hasExact())
1796         InDemandedMask.setLowBits(ShAmt);
1797 
1798       // If any of the demanded bits are produced by the sign extension, we also
1799       // demand the input sign bit.
1800       if (DemandedBits.countLeadingZeros() < ShAmt)
1801         InDemandedMask.setSignBit();
1802 
1803       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1804                                Depth + 1))
1805         return true;
1806       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1807       Known.Zero.lshrInPlace(ShAmt);
1808       Known.One.lshrInPlace(ShAmt);
1809 
1810       // If the input sign bit is known to be zero, or if none of the top bits
1811       // are demanded, turn this into an unsigned shift right.
1812       if (Known.Zero[BitWidth - ShAmt - 1] ||
1813           DemandedBits.countLeadingZeros() >= ShAmt) {
1814         SDNodeFlags Flags;
1815         Flags.setExact(Op->getFlags().hasExact());
1816         return TLO.CombineTo(
1817             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1818       }
1819 
1820       int Log2 = DemandedBits.exactLogBase2();
1821       if (Log2 >= 0) {
1822         // The bit must come from the sign.
1823         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1824         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1825       }
1826 
1827       if (Known.One[BitWidth - ShAmt - 1])
1828         // New bits are known one.
1829         Known.One.setHighBits(ShAmt);
1830 
1831       // Attempt to avoid multi-use ops if we don't need anything from them.
1832       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1833         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1834             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1835         if (DemandedOp0) {
1836           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1837           return TLO.CombineTo(Op, NewOp);
1838         }
1839       }
1840     }
1841     break;
1842   }
1843   case ISD::FSHL:
1844   case ISD::FSHR: {
1845     SDValue Op0 = Op.getOperand(0);
1846     SDValue Op1 = Op.getOperand(1);
1847     SDValue Op2 = Op.getOperand(2);
1848     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1849 
1850     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1851       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1852 
1853       // For fshl, 0-shift returns the 1st arg.
1854       // For fshr, 0-shift returns the 2nd arg.
1855       if (Amt == 0) {
1856         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1857                                  Known, TLO, Depth + 1))
1858           return true;
1859         break;
1860       }
1861 
1862       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1863       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1864       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1865       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1866       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1867                                Depth + 1))
1868         return true;
1869       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1870                                Depth + 1))
1871         return true;
1872 
1873       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1874       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1875       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1876       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1877       Known.One |= Known2.One;
1878       Known.Zero |= Known2.Zero;
1879     }
1880 
1881     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1882     if (isPowerOf2_32(BitWidth)) {
1883       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1884       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1885                                Known2, TLO, Depth + 1))
1886         return true;
1887     }
1888     break;
1889   }
1890   case ISD::ROTL:
1891   case ISD::ROTR: {
1892     SDValue Op0 = Op.getOperand(0);
1893     SDValue Op1 = Op.getOperand(1);
1894     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
1895 
1896     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1897     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1898       return TLO.CombineTo(Op, Op0);
1899 
1900     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1901       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1902       unsigned RevAmt = BitWidth - Amt;
1903 
1904       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
1905       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
1906       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
1907       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1908                                Depth + 1))
1909         return true;
1910 
1911       // rot*(x, 0) --> x
1912       if (Amt == 0)
1913         return TLO.CombineTo(Op, Op0);
1914 
1915       // See if we don't demand either half of the rotated bits.
1916       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
1917           DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
1918         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
1919         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
1920       }
1921       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
1922           DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
1923         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
1924         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1925       }
1926     }
1927 
1928     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1929     if (isPowerOf2_32(BitWidth)) {
1930       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1931       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1932                                Depth + 1))
1933         return true;
1934     }
1935     break;
1936   }
1937   case ISD::UMIN: {
1938     // Check if one arg is always less than (or equal) to the other arg.
1939     SDValue Op0 = Op.getOperand(0);
1940     SDValue Op1 = Op.getOperand(1);
1941     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1942     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1943     Known = KnownBits::umin(Known0, Known1);
1944     if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1945       return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1946     if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1947       return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1948     break;
1949   }
1950   case ISD::UMAX: {
1951     // Check if one arg is always greater than (or equal) to the other arg.
1952     SDValue Op0 = Op.getOperand(0);
1953     SDValue Op1 = Op.getOperand(1);
1954     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1955     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1956     Known = KnownBits::umax(Known0, Known1);
1957     if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1958       return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1959     if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1960       return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1961     break;
1962   }
1963   case ISD::BITREVERSE: {
1964     SDValue Src = Op.getOperand(0);
1965     APInt DemandedSrcBits = DemandedBits.reverseBits();
1966     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1967                              Depth + 1))
1968       return true;
1969     Known.One = Known2.One.reverseBits();
1970     Known.Zero = Known2.Zero.reverseBits();
1971     break;
1972   }
1973   case ISD::BSWAP: {
1974     SDValue Src = Op.getOperand(0);
1975 
1976     // If the only bits demanded come from one byte of the bswap result,
1977     // just shift the input byte into position to eliminate the bswap.
1978     unsigned NLZ = DemandedBits.countLeadingZeros();
1979     unsigned NTZ = DemandedBits.countTrailingZeros();
1980 
1981     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
1982     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
1983     // have 14 leading zeros, round to 8.
1984     NLZ = alignDown(NLZ, 8);
1985     NTZ = alignDown(NTZ, 8);
1986     // If we need exactly one byte, we can do this transformation.
1987     if (BitWidth - NLZ - NTZ == 8) {
1988       // Replace this with either a left or right shift to get the byte into
1989       // the right place.
1990       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
1991       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
1992         EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
1993         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
1994         SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
1995         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
1996         return TLO.CombineTo(Op, NewOp);
1997       }
1998     }
1999 
2000     APInt DemandedSrcBits = DemandedBits.byteSwap();
2001     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2002                              Depth + 1))
2003       return true;
2004     Known.One = Known2.One.byteSwap();
2005     Known.Zero = Known2.Zero.byteSwap();
2006     break;
2007   }
2008   case ISD::CTPOP: {
2009     // If only 1 bit is demanded, replace with PARITY as long as we're before
2010     // op legalization.
2011     // FIXME: Limit to scalars for now.
2012     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2013       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2014                                                Op.getOperand(0)));
2015 
2016     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2017     break;
2018   }
2019   case ISD::SIGN_EXTEND_INREG: {
2020     SDValue Op0 = Op.getOperand(0);
2021     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2022     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2023 
2024     // If we only care about the highest bit, don't bother shifting right.
2025     if (DemandedBits.isSignMask()) {
2026       unsigned MinSignedBits =
2027           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2028       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2029       // However if the input is already sign extended we expect the sign
2030       // extension to be dropped altogether later and do not simplify.
2031       if (!AlreadySignExtended) {
2032         // Compute the correct shift amount type, which must be getShiftAmountTy
2033         // for scalar types after legalization.
2034         SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2035                                                getShiftAmountTy(VT, DL));
2036         return TLO.CombineTo(Op,
2037                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2038       }
2039     }
2040 
2041     // If none of the extended bits are demanded, eliminate the sextinreg.
2042     if (DemandedBits.getActiveBits() <= ExVTBits)
2043       return TLO.CombineTo(Op, Op0);
2044 
2045     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2046 
2047     // Since the sign extended bits are demanded, we know that the sign
2048     // bit is demanded.
2049     InputDemandedBits.setBit(ExVTBits - 1);
2050 
2051     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
2052       return true;
2053     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2054 
2055     // If the sign bit of the input is known set or clear, then we know the
2056     // top bits of the result.
2057 
2058     // If the input sign bit is known zero, convert this into a zero extension.
2059     if (Known.Zero[ExVTBits - 1])
2060       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2061 
2062     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2063     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2064       Known.One.setBitsFrom(ExVTBits);
2065       Known.Zero &= Mask;
2066     } else { // Input sign bit unknown
2067       Known.Zero &= Mask;
2068       Known.One &= Mask;
2069     }
2070     break;
2071   }
2072   case ISD::BUILD_PAIR: {
2073     EVT HalfVT = Op.getOperand(0).getValueType();
2074     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2075 
2076     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2077     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2078 
2079     KnownBits KnownLo, KnownHi;
2080 
2081     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2082       return true;
2083 
2084     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2085       return true;
2086 
2087     Known.Zero = KnownLo.Zero.zext(BitWidth) |
2088                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
2089 
2090     Known.One = KnownLo.One.zext(BitWidth) |
2091                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
2092     break;
2093   }
2094   case ISD::ZERO_EXTEND:
2095   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2096     SDValue Src = Op.getOperand(0);
2097     EVT SrcVT = Src.getValueType();
2098     unsigned InBits = SrcVT.getScalarSizeInBits();
2099     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2100     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2101 
2102     // If none of the top bits are demanded, convert this into an any_extend.
2103     if (DemandedBits.getActiveBits() <= InBits) {
2104       // If we only need the non-extended bits of the bottom element
2105       // then we can just bitcast to the result.
2106       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2107           VT.getSizeInBits() == SrcVT.getSizeInBits())
2108         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2109 
2110       unsigned Opc =
2111           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2112       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2113         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2114     }
2115 
2116     APInt InDemandedBits = DemandedBits.trunc(InBits);
2117     APInt InDemandedElts = DemandedElts.zext(InElts);
2118     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2119                              Depth + 1))
2120       return true;
2121     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2122     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2123     Known = Known.zext(BitWidth);
2124 
2125     // Attempt to avoid multi-use ops if we don't need anything from them.
2126     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2127             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2128       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2129     break;
2130   }
2131   case ISD::SIGN_EXTEND:
2132   case ISD::SIGN_EXTEND_VECTOR_INREG: {
2133     SDValue Src = Op.getOperand(0);
2134     EVT SrcVT = Src.getValueType();
2135     unsigned InBits = SrcVT.getScalarSizeInBits();
2136     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2137     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2138 
2139     // If none of the top bits are demanded, convert this into an any_extend.
2140     if (DemandedBits.getActiveBits() <= InBits) {
2141       // If we only need the non-extended bits of the bottom element
2142       // then we can just bitcast to the result.
2143       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2144           VT.getSizeInBits() == SrcVT.getSizeInBits())
2145         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2146 
2147       unsigned Opc =
2148           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2149       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2150         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2151     }
2152 
2153     APInt InDemandedBits = DemandedBits.trunc(InBits);
2154     APInt InDemandedElts = DemandedElts.zext(InElts);
2155 
2156     // Since some of the sign extended bits are demanded, we know that the sign
2157     // bit is demanded.
2158     InDemandedBits.setBit(InBits - 1);
2159 
2160     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2161                              Depth + 1))
2162       return true;
2163     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2164     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2165 
2166     // If the sign bit is known one, the top bits match.
2167     Known = Known.sext(BitWidth);
2168 
2169     // If the sign bit is known zero, convert this to a zero extend.
2170     if (Known.isNonNegative()) {
2171       unsigned Opc =
2172           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2173       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2174         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2175     }
2176 
2177     // Attempt to avoid multi-use ops if we don't need anything from them.
2178     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2179             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2180       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2181     break;
2182   }
2183   case ISD::ANY_EXTEND:
2184   case ISD::ANY_EXTEND_VECTOR_INREG: {
2185     SDValue Src = Op.getOperand(0);
2186     EVT SrcVT = Src.getValueType();
2187     unsigned InBits = SrcVT.getScalarSizeInBits();
2188     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2189     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2190 
2191     // If we only need the bottom element then we can just bitcast.
2192     // TODO: Handle ANY_EXTEND?
2193     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2194         VT.getSizeInBits() == SrcVT.getSizeInBits())
2195       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2196 
2197     APInt InDemandedBits = DemandedBits.trunc(InBits);
2198     APInt InDemandedElts = DemandedElts.zext(InElts);
2199     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2200                              Depth + 1))
2201       return true;
2202     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2203     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2204     Known = Known.anyext(BitWidth);
2205 
2206     // Attempt to avoid multi-use ops if we don't need anything from them.
2207     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2208             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2209       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2210     break;
2211   }
2212   case ISD::TRUNCATE: {
2213     SDValue Src = Op.getOperand(0);
2214 
2215     // Simplify the input, using demanded bit information, and compute the known
2216     // zero/one bits live out.
2217     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2218     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2219     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2220                              Depth + 1))
2221       return true;
2222     Known = Known.trunc(BitWidth);
2223 
2224     // Attempt to avoid multi-use ops if we don't need anything from them.
2225     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2226             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2227       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2228 
2229     // If the input is only used by this truncate, see if we can shrink it based
2230     // on the known demanded bits.
2231     if (Src.getNode()->hasOneUse()) {
2232       switch (Src.getOpcode()) {
2233       default:
2234         break;
2235       case ISD::SRL:
2236         // Shrink SRL by a constant if none of the high bits shifted in are
2237         // demanded.
2238         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2239           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2240           // undesirable.
2241           break;
2242 
2243         const APInt *ShAmtC =
2244             TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2245         if (!ShAmtC || ShAmtC->uge(BitWidth))
2246           break;
2247         uint64_t ShVal = ShAmtC->getZExtValue();
2248 
2249         APInt HighBits =
2250             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2251         HighBits.lshrInPlace(ShVal);
2252         HighBits = HighBits.trunc(BitWidth);
2253 
2254         if (!(HighBits & DemandedBits)) {
2255           // None of the shifted in bits are needed.  Add a truncate of the
2256           // shift input, then shift it.
2257           SDValue NewShAmt = TLO.DAG.getConstant(
2258               ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2259           SDValue NewTrunc =
2260               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2261           return TLO.CombineTo(
2262               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2263         }
2264         break;
2265       }
2266     }
2267 
2268     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2269     break;
2270   }
2271   case ISD::AssertZext: {
2272     // AssertZext demands all of the high bits, plus any of the low bits
2273     // demanded by its users.
2274     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2275     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2276     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2277                              TLO, Depth + 1))
2278       return true;
2279     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2280 
2281     Known.Zero |= ~InMask;
2282     break;
2283   }
2284   case ISD::EXTRACT_VECTOR_ELT: {
2285     SDValue Src = Op.getOperand(0);
2286     SDValue Idx = Op.getOperand(1);
2287     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2288     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2289 
2290     if (SrcEltCnt.isScalable())
2291       return false;
2292 
2293     // Demand the bits from every vector element without a constant index.
2294     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2295     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2296     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2297       if (CIdx->getAPIntValue().ult(NumSrcElts))
2298         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2299 
2300     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2301     // anything about the extended bits.
2302     APInt DemandedSrcBits = DemandedBits;
2303     if (BitWidth > EltBitWidth)
2304       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2305 
2306     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2307                              Depth + 1))
2308       return true;
2309 
2310     // Attempt to avoid multi-use ops if we don't need anything from them.
2311     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2312       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2313               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2314         SDValue NewOp =
2315             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2316         return TLO.CombineTo(Op, NewOp);
2317       }
2318     }
2319 
2320     Known = Known2;
2321     if (BitWidth > EltBitWidth)
2322       Known = Known.anyext(BitWidth);
2323     break;
2324   }
2325   case ISD::BITCAST: {
2326     SDValue Src = Op.getOperand(0);
2327     EVT SrcVT = Src.getValueType();
2328     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2329 
2330     // If this is an FP->Int bitcast and if the sign bit is the only
2331     // thing demanded, turn this into a FGETSIGN.
2332     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2333         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2334         SrcVT.isFloatingPoint()) {
2335       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2336       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2337       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2338           SrcVT != MVT::f128) {
2339         // Cannot eliminate/lower SHL for f128 yet.
2340         EVT Ty = OpVTLegal ? VT : MVT::i32;
2341         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2342         // place.  We expect the SHL to be eliminated by other optimizations.
2343         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2344         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2345         if (!OpVTLegal && OpVTSizeInBits > 32)
2346           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2347         unsigned ShVal = Op.getValueSizeInBits() - 1;
2348         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2349         return TLO.CombineTo(Op,
2350                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2351       }
2352     }
2353 
2354     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2355     // Demand the elt/bit if any of the original elts/bits are demanded.
2356     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2357       unsigned Scale = BitWidth / NumSrcEltBits;
2358       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2359       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2360       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2361       for (unsigned i = 0; i != Scale; ++i) {
2362         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2363         unsigned BitOffset = EltOffset * NumSrcEltBits;
2364         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2365         if (!Sub.isZero()) {
2366           DemandedSrcBits |= Sub;
2367           for (unsigned j = 0; j != NumElts; ++j)
2368             if (DemandedElts[j])
2369               DemandedSrcElts.setBit((j * Scale) + i);
2370         }
2371       }
2372 
2373       APInt KnownSrcUndef, KnownSrcZero;
2374       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2375                                      KnownSrcZero, TLO, Depth + 1))
2376         return true;
2377 
2378       KnownBits KnownSrcBits;
2379       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2380                                KnownSrcBits, TLO, Depth + 1))
2381         return true;
2382     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2383       // TODO - bigendian once we have test coverage.
2384       unsigned Scale = NumSrcEltBits / BitWidth;
2385       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2386       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2387       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2388       for (unsigned i = 0; i != NumElts; ++i)
2389         if (DemandedElts[i]) {
2390           unsigned Offset = (i % Scale) * BitWidth;
2391           DemandedSrcBits.insertBits(DemandedBits, Offset);
2392           DemandedSrcElts.setBit(i / Scale);
2393         }
2394 
2395       if (SrcVT.isVector()) {
2396         APInt KnownSrcUndef, KnownSrcZero;
2397         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2398                                        KnownSrcZero, TLO, Depth + 1))
2399           return true;
2400       }
2401 
2402       KnownBits KnownSrcBits;
2403       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2404                                KnownSrcBits, TLO, Depth + 1))
2405         return true;
2406     }
2407 
2408     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2409     // recursive call where Known may be useful to the caller.
2410     if (Depth > 0) {
2411       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2412       return false;
2413     }
2414     break;
2415   }
2416   case ISD::MUL:
2417     if (DemandedBits.isPowerOf2()) {
2418       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2419       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2420       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2421       unsigned CTZ = DemandedBits.countTrailingZeros();
2422       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2423       if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2424         EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2425         SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2426         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2427         return TLO.CombineTo(Op, Shl);
2428       }
2429     }
2430     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2431     // X * X is odd iff X is odd.
2432     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2433     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2434       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2435       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2436       return TLO.CombineTo(Op, And1);
2437     }
2438     LLVM_FALLTHROUGH;
2439   case ISD::ADD:
2440   case ISD::SUB: {
2441     // Add, Sub, and Mul don't demand any bits in positions beyond that
2442     // of the highest bit demanded of them.
2443     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2444     SDNodeFlags Flags = Op.getNode()->getFlags();
2445     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2446     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2447     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2448                              Depth + 1) ||
2449         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2450                              Depth + 1) ||
2451         // See if the operation should be performed at a smaller bit width.
2452         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2453       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2454         // Disable the nsw and nuw flags. We can no longer guarantee that we
2455         // won't wrap after simplification.
2456         Flags.setNoSignedWrap(false);
2457         Flags.setNoUnsignedWrap(false);
2458         SDValue NewOp =
2459             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2460         return TLO.CombineTo(Op, NewOp);
2461       }
2462       return true;
2463     }
2464 
2465     // Attempt to avoid multi-use ops if we don't need anything from them.
2466     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2467       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2468           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2469       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2470           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2471       if (DemandedOp0 || DemandedOp1) {
2472         Flags.setNoSignedWrap(false);
2473         Flags.setNoUnsignedWrap(false);
2474         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2475         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2476         SDValue NewOp =
2477             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2478         return TLO.CombineTo(Op, NewOp);
2479       }
2480     }
2481 
2482     // If we have a constant operand, we may be able to turn it into -1 if we
2483     // do not demand the high bits. This can make the constant smaller to
2484     // encode, allow more general folding, or match specialized instruction
2485     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2486     // is probably not useful (and could be detrimental).
2487     ConstantSDNode *C = isConstOrConstSplat(Op1);
2488     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2489     if (C && !C->isAllOnes() && !C->isOne() &&
2490         (C->getAPIntValue() | HighMask).isAllOnes()) {
2491       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2492       // Disable the nsw and nuw flags. We can no longer guarantee that we
2493       // won't wrap after simplification.
2494       Flags.setNoSignedWrap(false);
2495       Flags.setNoUnsignedWrap(false);
2496       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2497       return TLO.CombineTo(Op, NewOp);
2498     }
2499 
2500     // Match a multiply with a disguised negated-power-of-2 and convert to a
2501     // an equivalent shift-left amount.
2502     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2503     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2504       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2505         return 0;
2506 
2507       // Don't touch opaque constants. Also, ignore zero and power-of-2
2508       // multiplies. Those will get folded later.
2509       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2510       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2511           !MulC->getAPIntValue().isPowerOf2()) {
2512         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2513         if (UnmaskedC.isNegatedPowerOf2())
2514           return (-UnmaskedC).logBase2();
2515       }
2516       return 0;
2517     };
2518 
2519     auto foldMul = [&](SDValue X, SDValue Y, unsigned ShlAmt) {
2520       EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2521       SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2522       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2523       SDValue Sub = TLO.DAG.getNode(ISD::SUB, dl, VT, Y, Shl);
2524       return TLO.CombineTo(Op, Sub);
2525     };
2526 
2527     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2528       if (Op.getOpcode() == ISD::ADD) {
2529         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2530         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2531           return foldMul(Op0.getOperand(0), Op1, ShAmt);
2532         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2533         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2534           return foldMul(Op1.getOperand(0), Op0, ShAmt);
2535         // TODO:
2536         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2537       }
2538     }
2539 
2540     LLVM_FALLTHROUGH;
2541   }
2542   default:
2543     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2544       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2545                                             Known, TLO, Depth))
2546         return true;
2547       break;
2548     }
2549 
2550     // Just use computeKnownBits to compute output bits.
2551     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2552     break;
2553   }
2554 
2555   // If we know the value of all of the demanded bits, return this as a
2556   // constant.
2557   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2558     // Avoid folding to a constant if any OpaqueConstant is involved.
2559     const SDNode *N = Op.getNode();
2560     for (SDNode *Op :
2561          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2562       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2563         if (C->isOpaque())
2564           return false;
2565     }
2566     if (VT.isInteger())
2567       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2568     if (VT.isFloatingPoint())
2569       return TLO.CombineTo(
2570           Op,
2571           TLO.DAG.getConstantFP(
2572               APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2573   }
2574 
2575   return false;
2576 }
2577 
2578 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2579                                                 const APInt &DemandedElts,
2580                                                 DAGCombinerInfo &DCI) const {
2581   SelectionDAG &DAG = DCI.DAG;
2582   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2583                         !DCI.isBeforeLegalizeOps());
2584 
2585   APInt KnownUndef, KnownZero;
2586   bool Simplified =
2587       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2588   if (Simplified) {
2589     DCI.AddToWorklist(Op.getNode());
2590     DCI.CommitTargetLoweringOpt(TLO);
2591   }
2592 
2593   return Simplified;
2594 }
2595 
2596 /// Given a vector binary operation and known undefined elements for each input
2597 /// operand, compute whether each element of the output is undefined.
2598 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2599                                          const APInt &UndefOp0,
2600                                          const APInt &UndefOp1) {
2601   EVT VT = BO.getValueType();
2602   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2603          "Vector binop only");
2604 
2605   EVT EltVT = VT.getVectorElementType();
2606   unsigned NumElts = VT.getVectorNumElements();
2607   assert(UndefOp0.getBitWidth() == NumElts &&
2608          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2609 
2610   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2611                                    const APInt &UndefVals) {
2612     if (UndefVals[Index])
2613       return DAG.getUNDEF(EltVT);
2614 
2615     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2616       // Try hard to make sure that the getNode() call is not creating temporary
2617       // nodes. Ignore opaque integers because they do not constant fold.
2618       SDValue Elt = BV->getOperand(Index);
2619       auto *C = dyn_cast<ConstantSDNode>(Elt);
2620       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2621         return Elt;
2622     }
2623 
2624     return SDValue();
2625   };
2626 
2627   APInt KnownUndef = APInt::getZero(NumElts);
2628   for (unsigned i = 0; i != NumElts; ++i) {
2629     // If both inputs for this element are either constant or undef and match
2630     // the element type, compute the constant/undef result for this element of
2631     // the vector.
2632     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2633     // not handle FP constants. The code within getNode() should be refactored
2634     // to avoid the danger of creating a bogus temporary node here.
2635     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2636     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2637     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2638       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2639         KnownUndef.setBit(i);
2640   }
2641   return KnownUndef;
2642 }
2643 
2644 bool TargetLowering::SimplifyDemandedVectorElts(
2645     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2646     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2647     bool AssumeSingleUse) const {
2648   EVT VT = Op.getValueType();
2649   unsigned Opcode = Op.getOpcode();
2650   APInt DemandedElts = OriginalDemandedElts;
2651   unsigned NumElts = DemandedElts.getBitWidth();
2652   assert(VT.isVector() && "Expected vector op");
2653 
2654   KnownUndef = KnownZero = APInt::getZero(NumElts);
2655 
2656   const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2657   if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2658     return false;
2659 
2660   // TODO: For now we assume we know nothing about scalable vectors.
2661   if (VT.isScalableVector())
2662     return false;
2663 
2664   assert(VT.getVectorNumElements() == NumElts &&
2665          "Mask size mismatches value type element count!");
2666 
2667   // Undef operand.
2668   if (Op.isUndef()) {
2669     KnownUndef.setAllBits();
2670     return false;
2671   }
2672 
2673   // If Op has other users, assume that all elements are needed.
2674   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2675     DemandedElts.setAllBits();
2676 
2677   // Not demanding any elements from Op.
2678   if (DemandedElts == 0) {
2679     KnownUndef.setAllBits();
2680     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2681   }
2682 
2683   // Limit search depth.
2684   if (Depth >= SelectionDAG::MaxRecursionDepth)
2685     return false;
2686 
2687   SDLoc DL(Op);
2688   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2689   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2690 
2691   // Helper for demanding the specified elements and all the bits of both binary
2692   // operands.
2693   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2694     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2695                                                            TLO.DAG, Depth + 1);
2696     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2697                                                            TLO.DAG, Depth + 1);
2698     if (NewOp0 || NewOp1) {
2699       SDValue NewOp = TLO.DAG.getNode(
2700           Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2701       return TLO.CombineTo(Op, NewOp);
2702     }
2703     return false;
2704   };
2705 
2706   switch (Opcode) {
2707   case ISD::SCALAR_TO_VECTOR: {
2708     if (!DemandedElts[0]) {
2709       KnownUndef.setAllBits();
2710       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2711     }
2712     SDValue ScalarSrc = Op.getOperand(0);
2713     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2714       SDValue Src = ScalarSrc.getOperand(0);
2715       SDValue Idx = ScalarSrc.getOperand(1);
2716       EVT SrcVT = Src.getValueType();
2717 
2718       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2719 
2720       if (SrcEltCnt.isScalable())
2721         return false;
2722 
2723       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2724       if (isNullConstant(Idx)) {
2725         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2726         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2727         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2728         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2729                                        TLO, Depth + 1))
2730           return true;
2731       }
2732     }
2733     KnownUndef.setHighBits(NumElts - 1);
2734     break;
2735   }
2736   case ISD::BITCAST: {
2737     SDValue Src = Op.getOperand(0);
2738     EVT SrcVT = Src.getValueType();
2739 
2740     // We only handle vectors here.
2741     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2742     if (!SrcVT.isVector())
2743       break;
2744 
2745     // Fast handling of 'identity' bitcasts.
2746     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2747     if (NumSrcElts == NumElts)
2748       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2749                                         KnownZero, TLO, Depth + 1);
2750 
2751     APInt SrcDemandedElts, SrcZero, SrcUndef;
2752 
2753     // Bitcast from 'large element' src vector to 'small element' vector, we
2754     // must demand a source element if any DemandedElt maps to it.
2755     if ((NumElts % NumSrcElts) == 0) {
2756       unsigned Scale = NumElts / NumSrcElts;
2757       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2758       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2759                                      TLO, Depth + 1))
2760         return true;
2761 
2762       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2763       // of the large element.
2764       // TODO - bigendian once we have test coverage.
2765       if (IsLE) {
2766         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2767         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2768         for (unsigned i = 0; i != NumElts; ++i)
2769           if (DemandedElts[i]) {
2770             unsigned Ofs = (i % Scale) * EltSizeInBits;
2771             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2772           }
2773 
2774         KnownBits Known;
2775         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2776                                  TLO, Depth + 1))
2777           return true;
2778 
2779         // The bitcast has split each wide element into a number of
2780         // narrow subelements. We have just computed the Known bits
2781         // for wide elements. See if element splitting results in
2782         // some subelements being zero. Only for demanded elements!
2783         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2784           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2785                    .isAllOnes())
2786             continue;
2787           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2788             unsigned Elt = Scale * SrcElt + SubElt;
2789             if (DemandedElts[Elt])
2790               KnownZero.setBit(Elt);
2791           }
2792         }
2793       }
2794 
2795       // If the src element is zero/undef then all the output elements will be -
2796       // only demanded elements are guaranteed to be correct.
2797       for (unsigned i = 0; i != NumSrcElts; ++i) {
2798         if (SrcDemandedElts[i]) {
2799           if (SrcZero[i])
2800             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2801           if (SrcUndef[i])
2802             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2803         }
2804       }
2805     }
2806 
2807     // Bitcast from 'small element' src vector to 'large element' vector, we
2808     // demand all smaller source elements covered by the larger demanded element
2809     // of this vector.
2810     if ((NumSrcElts % NumElts) == 0) {
2811       unsigned Scale = NumSrcElts / NumElts;
2812       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2813       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2814                                      TLO, Depth + 1))
2815         return true;
2816 
2817       // If all the src elements covering an output element are zero/undef, then
2818       // the output element will be as well, assuming it was demanded.
2819       for (unsigned i = 0; i != NumElts; ++i) {
2820         if (DemandedElts[i]) {
2821           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2822             KnownZero.setBit(i);
2823           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2824             KnownUndef.setBit(i);
2825         }
2826       }
2827     }
2828     break;
2829   }
2830   case ISD::BUILD_VECTOR: {
2831     // Check all elements and simplify any unused elements with UNDEF.
2832     if (!DemandedElts.isAllOnes()) {
2833       // Don't simplify BROADCASTS.
2834       if (llvm::any_of(Op->op_values(),
2835                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2836         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2837         bool Updated = false;
2838         for (unsigned i = 0; i != NumElts; ++i) {
2839           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2840             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2841             KnownUndef.setBit(i);
2842             Updated = true;
2843           }
2844         }
2845         if (Updated)
2846           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2847       }
2848     }
2849     for (unsigned i = 0; i != NumElts; ++i) {
2850       SDValue SrcOp = Op.getOperand(i);
2851       if (SrcOp.isUndef()) {
2852         KnownUndef.setBit(i);
2853       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2854                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2855         KnownZero.setBit(i);
2856       }
2857     }
2858     break;
2859   }
2860   case ISD::CONCAT_VECTORS: {
2861     EVT SubVT = Op.getOperand(0).getValueType();
2862     unsigned NumSubVecs = Op.getNumOperands();
2863     unsigned NumSubElts = SubVT.getVectorNumElements();
2864     for (unsigned i = 0; i != NumSubVecs; ++i) {
2865       SDValue SubOp = Op.getOperand(i);
2866       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2867       APInt SubUndef, SubZero;
2868       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2869                                      Depth + 1))
2870         return true;
2871       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2872       KnownZero.insertBits(SubZero, i * NumSubElts);
2873     }
2874     break;
2875   }
2876   case ISD::INSERT_SUBVECTOR: {
2877     // Demand any elements from the subvector and the remainder from the src its
2878     // inserted into.
2879     SDValue Src = Op.getOperand(0);
2880     SDValue Sub = Op.getOperand(1);
2881     uint64_t Idx = Op.getConstantOperandVal(2);
2882     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2883     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2884     APInt DemandedSrcElts = DemandedElts;
2885     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
2886 
2887     APInt SubUndef, SubZero;
2888     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2889                                    Depth + 1))
2890       return true;
2891 
2892     // If none of the src operand elements are demanded, replace it with undef.
2893     if (!DemandedSrcElts && !Src.isUndef())
2894       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2895                                                TLO.DAG.getUNDEF(VT), Sub,
2896                                                Op.getOperand(2)));
2897 
2898     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2899                                    TLO, Depth + 1))
2900       return true;
2901     KnownUndef.insertBits(SubUndef, Idx);
2902     KnownZero.insertBits(SubZero, Idx);
2903 
2904     // Attempt to avoid multi-use ops if we don't need anything from them.
2905     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
2906       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2907           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2908       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2909           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2910       if (NewSrc || NewSub) {
2911         NewSrc = NewSrc ? NewSrc : Src;
2912         NewSub = NewSub ? NewSub : Sub;
2913         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2914                                         NewSub, Op.getOperand(2));
2915         return TLO.CombineTo(Op, NewOp);
2916       }
2917     }
2918     break;
2919   }
2920   case ISD::EXTRACT_SUBVECTOR: {
2921     // Offset the demanded elts by the subvector index.
2922     SDValue Src = Op.getOperand(0);
2923     if (Src.getValueType().isScalableVector())
2924       break;
2925     uint64_t Idx = Op.getConstantOperandVal(1);
2926     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2927     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
2928 
2929     APInt SrcUndef, SrcZero;
2930     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2931                                    Depth + 1))
2932       return true;
2933     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2934     KnownZero = SrcZero.extractBits(NumElts, Idx);
2935 
2936     // Attempt to avoid multi-use ops if we don't need anything from them.
2937     if (!DemandedElts.isAllOnes()) {
2938       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2939           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2940       if (NewSrc) {
2941         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2942                                         Op.getOperand(1));
2943         return TLO.CombineTo(Op, NewOp);
2944       }
2945     }
2946     break;
2947   }
2948   case ISD::INSERT_VECTOR_ELT: {
2949     SDValue Vec = Op.getOperand(0);
2950     SDValue Scl = Op.getOperand(1);
2951     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2952 
2953     // For a legal, constant insertion index, if we don't need this insertion
2954     // then strip it, else remove it from the demanded elts.
2955     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2956       unsigned Idx = CIdx->getZExtValue();
2957       if (!DemandedElts[Idx])
2958         return TLO.CombineTo(Op, Vec);
2959 
2960       APInt DemandedVecElts(DemandedElts);
2961       DemandedVecElts.clearBit(Idx);
2962       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2963                                      KnownZero, TLO, Depth + 1))
2964         return true;
2965 
2966       KnownUndef.setBitVal(Idx, Scl.isUndef());
2967 
2968       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
2969       break;
2970     }
2971 
2972     APInt VecUndef, VecZero;
2973     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2974                                    Depth + 1))
2975       return true;
2976     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2977     break;
2978   }
2979   case ISD::VSELECT: {
2980     // Try to transform the select condition based on the current demanded
2981     // elements.
2982     // TODO: If a condition element is undef, we can choose from one arm of the
2983     //       select (and if one arm is undef, then we can propagate that to the
2984     //       result).
2985     // TODO - add support for constant vselect masks (see IR version of this).
2986     APInt UnusedUndef, UnusedZero;
2987     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2988                                    UnusedZero, TLO, Depth + 1))
2989       return true;
2990 
2991     // See if we can simplify either vselect operand.
2992     APInt DemandedLHS(DemandedElts);
2993     APInt DemandedRHS(DemandedElts);
2994     APInt UndefLHS, ZeroLHS;
2995     APInt UndefRHS, ZeroRHS;
2996     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2997                                    ZeroLHS, TLO, Depth + 1))
2998       return true;
2999     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
3000                                    ZeroRHS, TLO, Depth + 1))
3001       return true;
3002 
3003     KnownUndef = UndefLHS & UndefRHS;
3004     KnownZero = ZeroLHS & ZeroRHS;
3005     break;
3006   }
3007   case ISD::VECTOR_SHUFFLE: {
3008     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3009 
3010     // Collect demanded elements from shuffle operands..
3011     APInt DemandedLHS(NumElts, 0);
3012     APInt DemandedRHS(NumElts, 0);
3013     for (unsigned i = 0; i != NumElts; ++i) {
3014       int M = ShuffleMask[i];
3015       if (M < 0 || !DemandedElts[i])
3016         continue;
3017       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3018       if (M < (int)NumElts)
3019         DemandedLHS.setBit(M);
3020       else
3021         DemandedRHS.setBit(M - NumElts);
3022     }
3023 
3024     // See if we can simplify either shuffle operand.
3025     APInt UndefLHS, ZeroLHS;
3026     APInt UndefRHS, ZeroRHS;
3027     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
3028                                    ZeroLHS, TLO, Depth + 1))
3029       return true;
3030     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
3031                                    ZeroRHS, TLO, Depth + 1))
3032       return true;
3033 
3034     // Simplify mask using undef elements from LHS/RHS.
3035     bool Updated = false;
3036     bool IdentityLHS = true, IdentityRHS = true;
3037     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
3038     for (unsigned i = 0; i != NumElts; ++i) {
3039       int &M = NewMask[i];
3040       if (M < 0)
3041         continue;
3042       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3043           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3044         Updated = true;
3045         M = -1;
3046       }
3047       IdentityLHS &= (M < 0) || (M == (int)i);
3048       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3049     }
3050 
3051     // Update legal shuffle masks based on demanded elements if it won't reduce
3052     // to Identity which can cause premature removal of the shuffle mask.
3053     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3054       SDValue LegalShuffle =
3055           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
3056                                   NewMask, TLO.DAG);
3057       if (LegalShuffle)
3058         return TLO.CombineTo(Op, LegalShuffle);
3059     }
3060 
3061     // Propagate undef/zero elements from LHS/RHS.
3062     for (unsigned i = 0; i != NumElts; ++i) {
3063       int M = ShuffleMask[i];
3064       if (M < 0) {
3065         KnownUndef.setBit(i);
3066       } else if (M < (int)NumElts) {
3067         if (UndefLHS[M])
3068           KnownUndef.setBit(i);
3069         if (ZeroLHS[M])
3070           KnownZero.setBit(i);
3071       } else {
3072         if (UndefRHS[M - NumElts])
3073           KnownUndef.setBit(i);
3074         if (ZeroRHS[M - NumElts])
3075           KnownZero.setBit(i);
3076       }
3077     }
3078     break;
3079   }
3080   case ISD::ANY_EXTEND_VECTOR_INREG:
3081   case ISD::SIGN_EXTEND_VECTOR_INREG:
3082   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3083     APInt SrcUndef, SrcZero;
3084     SDValue Src = Op.getOperand(0);
3085     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3086     APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3087     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3088                                    Depth + 1))
3089       return true;
3090     KnownZero = SrcZero.zextOrTrunc(NumElts);
3091     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3092 
3093     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3094         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3095         DemandedSrcElts == 1) {
3096       // aext - if we just need the bottom element then we can bitcast.
3097       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3098     }
3099 
3100     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3101       // zext(undef) upper bits are guaranteed to be zero.
3102       if (DemandedElts.isSubsetOf(KnownUndef))
3103         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3104       KnownUndef.clearAllBits();
3105 
3106       // zext - if we just need the bottom element then we can mask:
3107       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3108       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3109           Op->isOnlyUserOf(Src.getNode()) &&
3110           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3111         SDLoc DL(Op);
3112         EVT SrcVT = Src.getValueType();
3113         EVT SrcSVT = SrcVT.getScalarType();
3114         SmallVector<SDValue> MaskElts;
3115         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3116         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3117         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3118         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3119                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3120           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3121           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3122         }
3123       }
3124     }
3125     break;
3126   }
3127 
3128   // TODO: There are more binop opcodes that could be handled here - MIN,
3129   // MAX, saturated math, etc.
3130   case ISD::ADD: {
3131     SDValue Op0 = Op.getOperand(0);
3132     SDValue Op1 = Op.getOperand(1);
3133     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3134       APInt UndefLHS, ZeroLHS;
3135       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3136                                      Depth + 1, /*AssumeSingleUse*/ true))
3137         return true;
3138     }
3139     LLVM_FALLTHROUGH;
3140   }
3141   case ISD::OR:
3142   case ISD::XOR:
3143   case ISD::SUB:
3144   case ISD::FADD:
3145   case ISD::FSUB:
3146   case ISD::FMUL:
3147   case ISD::FDIV:
3148   case ISD::FREM: {
3149     SDValue Op0 = Op.getOperand(0);
3150     SDValue Op1 = Op.getOperand(1);
3151 
3152     APInt UndefRHS, ZeroRHS;
3153     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3154                                    Depth + 1))
3155       return true;
3156     APInt UndefLHS, ZeroLHS;
3157     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3158                                    Depth + 1))
3159       return true;
3160 
3161     KnownZero = ZeroLHS & ZeroRHS;
3162     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3163 
3164     // Attempt to avoid multi-use ops if we don't need anything from them.
3165     // TODO - use KnownUndef to relax the demandedelts?
3166     if (!DemandedElts.isAllOnes())
3167       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3168         return true;
3169     break;
3170   }
3171   case ISD::SHL:
3172   case ISD::SRL:
3173   case ISD::SRA:
3174   case ISD::ROTL:
3175   case ISD::ROTR: {
3176     SDValue Op0 = Op.getOperand(0);
3177     SDValue Op1 = Op.getOperand(1);
3178 
3179     APInt UndefRHS, ZeroRHS;
3180     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3181                                    Depth + 1))
3182       return true;
3183     APInt UndefLHS, ZeroLHS;
3184     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3185                                    Depth + 1))
3186       return true;
3187 
3188     KnownZero = ZeroLHS;
3189     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3190 
3191     // Attempt to avoid multi-use ops if we don't need anything from them.
3192     // TODO - use KnownUndef to relax the demandedelts?
3193     if (!DemandedElts.isAllOnes())
3194       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3195         return true;
3196     break;
3197   }
3198   case ISD::MUL:
3199   case ISD::AND: {
3200     SDValue Op0 = Op.getOperand(0);
3201     SDValue Op1 = Op.getOperand(1);
3202 
3203     APInt SrcUndef, SrcZero;
3204     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3205                                    Depth + 1))
3206       return true;
3207     if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3208                                    TLO, Depth + 1))
3209       return true;
3210 
3211     // If either side has a zero element, then the result element is zero, even
3212     // if the other is an UNDEF.
3213     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3214     // and then handle 'and' nodes with the rest of the binop opcodes.
3215     KnownZero |= SrcZero;
3216     KnownUndef &= SrcUndef;
3217     KnownUndef &= ~KnownZero;
3218 
3219     // Attempt to avoid multi-use ops if we don't need anything from them.
3220     // TODO - use KnownUndef to relax the demandedelts?
3221     if (!DemandedElts.isAllOnes())
3222       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3223         return true;
3224     break;
3225   }
3226   case ISD::TRUNCATE:
3227   case ISD::SIGN_EXTEND:
3228   case ISD::ZERO_EXTEND:
3229     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3230                                    KnownZero, TLO, Depth + 1))
3231       return true;
3232 
3233     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3234       // zext(undef) upper bits are guaranteed to be zero.
3235       if (DemandedElts.isSubsetOf(KnownUndef))
3236         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3237       KnownUndef.clearAllBits();
3238     }
3239     break;
3240   default: {
3241     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3242       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3243                                                   KnownZero, TLO, Depth))
3244         return true;
3245     } else {
3246       KnownBits Known;
3247       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3248       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3249                                TLO, Depth, AssumeSingleUse))
3250         return true;
3251     }
3252     break;
3253   }
3254   }
3255   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3256 
3257   // Constant fold all undef cases.
3258   // TODO: Handle zero cases as well.
3259   if (DemandedElts.isSubsetOf(KnownUndef))
3260     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3261 
3262   return false;
3263 }
3264 
3265 /// Determine which of the bits specified in Mask are known to be either zero or
3266 /// one and return them in the Known.
3267 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3268                                                    KnownBits &Known,
3269                                                    const APInt &DemandedElts,
3270                                                    const SelectionDAG &DAG,
3271                                                    unsigned Depth) const {
3272   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3273           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3274           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3275           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3276          "Should use MaskedValueIsZero if you don't know whether Op"
3277          " is a target node!");
3278   Known.resetAll();
3279 }
3280 
3281 void TargetLowering::computeKnownBitsForTargetInstr(
3282     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3283     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3284     unsigned Depth) const {
3285   Known.resetAll();
3286 }
3287 
3288 void TargetLowering::computeKnownBitsForFrameIndex(
3289   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3290   // The low bits are known zero if the pointer is aligned.
3291   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3292 }
3293 
3294 Align TargetLowering::computeKnownAlignForTargetInstr(
3295   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3296   unsigned Depth) const {
3297   return Align(1);
3298 }
3299 
3300 /// This method can be implemented by targets that want to expose additional
3301 /// information about sign bits to the DAG Combiner.
3302 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3303                                                          const APInt &,
3304                                                          const SelectionDAG &,
3305                                                          unsigned Depth) const {
3306   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3307           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3308           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3309           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3310          "Should use ComputeNumSignBits if you don't know whether Op"
3311          " is a target node!");
3312   return 1;
3313 }
3314 
3315 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3316   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3317   const MachineRegisterInfo &MRI, unsigned Depth) const {
3318   return 1;
3319 }
3320 
3321 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3322     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3323     TargetLoweringOpt &TLO, unsigned Depth) const {
3324   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3325           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3326           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3327           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3328          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3329          " is a target node!");
3330   return false;
3331 }
3332 
3333 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3334     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3335     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3336   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3337           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3338           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3339           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3340          "Should use SimplifyDemandedBits if you don't know whether Op"
3341          " is a target node!");
3342   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3343   return false;
3344 }
3345 
3346 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3347     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3348     SelectionDAG &DAG, unsigned Depth) const {
3349   assert(
3350       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3351        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3352        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3353        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3354       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3355       " is a target node!");
3356   return SDValue();
3357 }
3358 
3359 SDValue
3360 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3361                                         SDValue N1, MutableArrayRef<int> Mask,
3362                                         SelectionDAG &DAG) const {
3363   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3364   if (!LegalMask) {
3365     std::swap(N0, N1);
3366     ShuffleVectorSDNode::commuteMask(Mask);
3367     LegalMask = isShuffleMaskLegal(Mask, VT);
3368   }
3369 
3370   if (!LegalMask)
3371     return SDValue();
3372 
3373   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3374 }
3375 
3376 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3377   return nullptr;
3378 }
3379 
3380 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3381     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3382     bool PoisonOnly, unsigned Depth) const {
3383   assert(
3384       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3385        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3386        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3387        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3388       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3389       " is a target node!");
3390   return false;
3391 }
3392 
3393 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3394                                                   const SelectionDAG &DAG,
3395                                                   bool SNaN,
3396                                                   unsigned Depth) const {
3397   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3398           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3399           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3400           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3401          "Should use isKnownNeverNaN if you don't know whether Op"
3402          " is a target node!");
3403   return false;
3404 }
3405 
3406 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3407                                                const APInt &DemandedElts,
3408                                                APInt &UndefElts,
3409                                                unsigned Depth) const {
3410   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3411           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3412           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3413           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3414          "Should use isSplatValue if you don't know whether Op"
3415          " is a target node!");
3416   return false;
3417 }
3418 
3419 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3420 // work with truncating build vectors and vectors with elements of less than
3421 // 8 bits.
3422 bool TargetLowering::isConstTrueVal(SDValue N) const {
3423   if (!N)
3424     return false;
3425 
3426   unsigned EltWidth;
3427   APInt CVal;
3428   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3429                                                /*AllowTruncation=*/true)) {
3430     CVal = CN->getAPIntValue();
3431     EltWidth = N.getValueType().getScalarSizeInBits();
3432   } else
3433     return false;
3434 
3435   // If this is a truncating splat, truncate the splat value.
3436   // Otherwise, we may fail to match the expected values below.
3437   if (EltWidth < CVal.getBitWidth())
3438     CVal = CVal.trunc(EltWidth);
3439 
3440   switch (getBooleanContents(N.getValueType())) {
3441   case UndefinedBooleanContent:
3442     return CVal[0];
3443   case ZeroOrOneBooleanContent:
3444     return CVal.isOne();
3445   case ZeroOrNegativeOneBooleanContent:
3446     return CVal.isAllOnes();
3447   }
3448 
3449   llvm_unreachable("Invalid boolean contents");
3450 }
3451 
3452 bool TargetLowering::isConstFalseVal(SDValue N) const {
3453   if (!N)
3454     return false;
3455 
3456   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3457   if (!CN) {
3458     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3459     if (!BV)
3460       return false;
3461 
3462     // Only interested in constant splats, we don't care about undef
3463     // elements in identifying boolean constants and getConstantSplatNode
3464     // returns NULL if all ops are undef;
3465     CN = BV->getConstantSplatNode();
3466     if (!CN)
3467       return false;
3468   }
3469 
3470   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3471     return !CN->getAPIntValue()[0];
3472 
3473   return CN->isZero();
3474 }
3475 
3476 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3477                                        bool SExt) const {
3478   if (VT == MVT::i1)
3479     return N->isOne();
3480 
3481   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3482   switch (Cnt) {
3483   case TargetLowering::ZeroOrOneBooleanContent:
3484     // An extended value of 1 is always true, unless its original type is i1,
3485     // in which case it will be sign extended to -1.
3486     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3487   case TargetLowering::UndefinedBooleanContent:
3488   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3489     return N->isAllOnes() && SExt;
3490   }
3491   llvm_unreachable("Unexpected enumeration.");
3492 }
3493 
3494 /// This helper function of SimplifySetCC tries to optimize the comparison when
3495 /// either operand of the SetCC node is a bitwise-and instruction.
3496 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3497                                          ISD::CondCode Cond, const SDLoc &DL,
3498                                          DAGCombinerInfo &DCI) const {
3499   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3500     std::swap(N0, N1);
3501 
3502   SelectionDAG &DAG = DCI.DAG;
3503   EVT OpVT = N0.getValueType();
3504   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3505       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3506     return SDValue();
3507 
3508   // (X & Y) != 0 --> zextOrTrunc(X & Y)
3509   // iff everything but LSB is known zero:
3510   if (Cond == ISD::SETNE && isNullConstant(N1) &&
3511       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3512        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3513     unsigned NumEltBits = OpVT.getScalarSizeInBits();
3514     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3515     if (DAG.MaskedValueIsZero(N0, UpperBits))
3516       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3517   }
3518 
3519   // Match these patterns in any of their permutations:
3520   // (X & Y) == Y
3521   // (X & Y) != Y
3522   SDValue X, Y;
3523   if (N0.getOperand(0) == N1) {
3524     X = N0.getOperand(1);
3525     Y = N0.getOperand(0);
3526   } else if (N0.getOperand(1) == N1) {
3527     X = N0.getOperand(0);
3528     Y = N0.getOperand(1);
3529   } else {
3530     return SDValue();
3531   }
3532 
3533   SDValue Zero = DAG.getConstant(0, DL, OpVT);
3534   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3535     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3536     // Note that where Y is variable and is known to have at most one bit set
3537     // (for example, if it is Z & 1) we cannot do this; the expressions are not
3538     // equivalent when Y == 0.
3539     assert(OpVT.isInteger());
3540     Cond = ISD::getSetCCInverse(Cond, OpVT);
3541     if (DCI.isBeforeLegalizeOps() ||
3542         isCondCodeLegal(Cond, N0.getSimpleValueType()))
3543       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3544   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3545     // If the target supports an 'and-not' or 'and-complement' logic operation,
3546     // try to use that to make a comparison operation more efficient.
3547     // But don't do this transform if the mask is a single bit because there are
3548     // more efficient ways to deal with that case (for example, 'bt' on x86 or
3549     // 'rlwinm' on PPC).
3550 
3551     // Bail out if the compare operand that we want to turn into a zero is
3552     // already a zero (otherwise, infinite loop).
3553     auto *YConst = dyn_cast<ConstantSDNode>(Y);
3554     if (YConst && YConst->isZero())
3555       return SDValue();
3556 
3557     // Transform this into: ~X & Y == 0.
3558     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3559     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3560     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3561   }
3562 
3563   return SDValue();
3564 }
3565 
3566 /// There are multiple IR patterns that could be checking whether certain
3567 /// truncation of a signed number would be lossy or not. The pattern which is
3568 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3569 /// We are looking for the following pattern: (KeptBits is a constant)
3570 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3571 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3572 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
3573 /// We will unfold it into the natural trunc+sext pattern:
3574 ///   ((%x << C) a>> C) dstcond %x
3575 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
3576 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3577     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3578     const SDLoc &DL) const {
3579   // We must be comparing with a constant.
3580   ConstantSDNode *C1;
3581   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3582     return SDValue();
3583 
3584   // N0 should be:  add %x, (1 << (KeptBits-1))
3585   if (N0->getOpcode() != ISD::ADD)
3586     return SDValue();
3587 
3588   // And we must be 'add'ing a constant.
3589   ConstantSDNode *C01;
3590   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3591     return SDValue();
3592 
3593   SDValue X = N0->getOperand(0);
3594   EVT XVT = X.getValueType();
3595 
3596   // Validate constants ...
3597 
3598   APInt I1 = C1->getAPIntValue();
3599 
3600   ISD::CondCode NewCond;
3601   if (Cond == ISD::CondCode::SETULT) {
3602     NewCond = ISD::CondCode::SETEQ;
3603   } else if (Cond == ISD::CondCode::SETULE) {
3604     NewCond = ISD::CondCode::SETEQ;
3605     // But need to 'canonicalize' the constant.
3606     I1 += 1;
3607   } else if (Cond == ISD::CondCode::SETUGT) {
3608     NewCond = ISD::CondCode::SETNE;
3609     // But need to 'canonicalize' the constant.
3610     I1 += 1;
3611   } else if (Cond == ISD::CondCode::SETUGE) {
3612     NewCond = ISD::CondCode::SETNE;
3613   } else
3614     return SDValue();
3615 
3616   APInt I01 = C01->getAPIntValue();
3617 
3618   auto checkConstants = [&I1, &I01]() -> bool {
3619     // Both of them must be power-of-two, and the constant from setcc is bigger.
3620     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3621   };
3622 
3623   if (checkConstants()) {
3624     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
3625   } else {
3626     // What if we invert constants? (and the target predicate)
3627     I1.negate();
3628     I01.negate();
3629     assert(XVT.isInteger());
3630     NewCond = getSetCCInverse(NewCond, XVT);
3631     if (!checkConstants())
3632       return SDValue();
3633     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
3634   }
3635 
3636   // They are power-of-two, so which bit is set?
3637   const unsigned KeptBits = I1.logBase2();
3638   const unsigned KeptBitsMinusOne = I01.logBase2();
3639 
3640   // Magic!
3641   if (KeptBits != (KeptBitsMinusOne + 1))
3642     return SDValue();
3643   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3644 
3645   // We don't want to do this in every single case.
3646   SelectionDAG &DAG = DCI.DAG;
3647   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3648           XVT, KeptBits))
3649     return SDValue();
3650 
3651   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3652   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3653 
3654   // Unfold into:  ((%x << C) a>> C) cond %x
3655   // Where 'cond' will be either 'eq' or 'ne'.
3656   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3657   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3658   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3659   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3660 
3661   return T2;
3662 }
3663 
3664 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3665 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3666     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3667     DAGCombinerInfo &DCI, const SDLoc &DL) const {
3668   assert(isConstOrConstSplat(N1C) &&
3669          isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3670          "Should be a comparison with 0.");
3671   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3672          "Valid only for [in]equality comparisons.");
3673 
3674   unsigned NewShiftOpcode;
3675   SDValue X, C, Y;
3676 
3677   SelectionDAG &DAG = DCI.DAG;
3678   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3679 
3680   // Look for '(C l>>/<< Y)'.
3681   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3682     // The shift should be one-use.
3683     if (!V.hasOneUse())
3684       return false;
3685     unsigned OldShiftOpcode = V.getOpcode();
3686     switch (OldShiftOpcode) {
3687     case ISD::SHL:
3688       NewShiftOpcode = ISD::SRL;
3689       break;
3690     case ISD::SRL:
3691       NewShiftOpcode = ISD::SHL;
3692       break;
3693     default:
3694       return false; // must be a logical shift.
3695     }
3696     // We should be shifting a constant.
3697     // FIXME: best to use isConstantOrConstantVector().
3698     C = V.getOperand(0);
3699     ConstantSDNode *CC =
3700         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3701     if (!CC)
3702       return false;
3703     Y = V.getOperand(1);
3704 
3705     ConstantSDNode *XC =
3706         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3707     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3708         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3709   };
3710 
3711   // LHS of comparison should be an one-use 'and'.
3712   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3713     return SDValue();
3714 
3715   X = N0.getOperand(0);
3716   SDValue Mask = N0.getOperand(1);
3717 
3718   // 'and' is commutative!
3719   if (!Match(Mask)) {
3720     std::swap(X, Mask);
3721     if (!Match(Mask))
3722       return SDValue();
3723   }
3724 
3725   EVT VT = X.getValueType();
3726 
3727   // Produce:
3728   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3729   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3730   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3731   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3732   return T2;
3733 }
3734 
3735 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3736 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3737 /// handle the commuted versions of these patterns.
3738 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3739                                            ISD::CondCode Cond, const SDLoc &DL,
3740                                            DAGCombinerInfo &DCI) const {
3741   unsigned BOpcode = N0.getOpcode();
3742   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3743          "Unexpected binop");
3744   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3745 
3746   // (X + Y) == X --> Y == 0
3747   // (X - Y) == X --> Y == 0
3748   // (X ^ Y) == X --> Y == 0
3749   SelectionDAG &DAG = DCI.DAG;
3750   EVT OpVT = N0.getValueType();
3751   SDValue X = N0.getOperand(0);
3752   SDValue Y = N0.getOperand(1);
3753   if (X == N1)
3754     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3755 
3756   if (Y != N1)
3757     return SDValue();
3758 
3759   // (X + Y) == Y --> X == 0
3760   // (X ^ Y) == Y --> X == 0
3761   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3762     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3763 
3764   // The shift would not be valid if the operands are boolean (i1).
3765   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3766     return SDValue();
3767 
3768   // (X - Y) == Y --> X == Y << 1
3769   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3770                                  !DCI.isBeforeLegalize());
3771   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3772   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3773   if (!DCI.isCalledByLegalizer())
3774     DCI.AddToWorklist(YShl1.getNode());
3775   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3776 }
3777 
3778 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
3779                                       SDValue N0, const APInt &C1,
3780                                       ISD::CondCode Cond, const SDLoc &dl,
3781                                       SelectionDAG &DAG) {
3782   // Look through truncs that don't change the value of a ctpop.
3783   // FIXME: Add vector support? Need to be careful with setcc result type below.
3784   SDValue CTPOP = N0;
3785   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3786       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
3787     CTPOP = N0.getOperand(0);
3788 
3789   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3790     return SDValue();
3791 
3792   EVT CTVT = CTPOP.getValueType();
3793   SDValue CTOp = CTPOP.getOperand(0);
3794 
3795   // If this is a vector CTPOP, keep the CTPOP if it is legal.
3796   // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3797   if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3798     return SDValue();
3799 
3800   // (ctpop x) u< 2 -> (x & x-1) == 0
3801   // (ctpop x) u> 1 -> (x & x-1) != 0
3802   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3803     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3804     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3805       return SDValue();
3806     if (C1 == 0 && (Cond == ISD::SETULT))
3807       return SDValue(); // This is handled elsewhere.
3808 
3809     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3810 
3811     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3812     SDValue Result = CTOp;
3813     for (unsigned i = 0; i < Passes; i++) {
3814       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3815       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3816     }
3817     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3818     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3819   }
3820 
3821   // If ctpop is not supported, expand a power-of-2 comparison based on it.
3822   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3823     // For scalars, keep CTPOP if it is legal or custom.
3824     if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3825       return SDValue();
3826     // This is based on X86's custom lowering for CTPOP which produces more
3827     // instructions than the expansion here.
3828 
3829     // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3830     // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3831     SDValue Zero = DAG.getConstant(0, dl, CTVT);
3832     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3833     assert(CTVT.isInteger());
3834     ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3835     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3836     SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3837     SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3838     SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3839     unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3840     return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3841   }
3842 
3843   return SDValue();
3844 }
3845 
3846 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
3847                                    ISD::CondCode Cond, const SDLoc &dl,
3848                                    SelectionDAG &DAG) {
3849   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3850     return SDValue();
3851 
3852   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3853   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
3854     return SDValue();
3855 
3856   auto getRotateSource = [](SDValue X) {
3857     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
3858       return X.getOperand(0);
3859     return SDValue();
3860   };
3861 
3862   // Peek through a rotated value compared against 0 or -1:
3863   // (rot X, Y) == 0/-1 --> X == 0/-1
3864   // (rot X, Y) != 0/-1 --> X != 0/-1
3865   if (SDValue R = getRotateSource(N0))
3866     return DAG.getSetCC(dl, VT, R, N1, Cond);
3867 
3868   // Peek through an 'or' of a rotated value compared against 0:
3869   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
3870   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
3871   //
3872   // TODO: Add the 'and' with -1 sibling.
3873   // TODO: Recurse through a series of 'or' ops to find the rotate.
3874   EVT OpVT = N0.getValueType();
3875   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
3876     if (SDValue R = getRotateSource(N0.getOperand(0))) {
3877       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
3878       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3879     }
3880     if (SDValue R = getRotateSource(N0.getOperand(1))) {
3881       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
3882       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3883     }
3884   }
3885 
3886   return SDValue();
3887 }
3888 
3889 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
3890                                         ISD::CondCode Cond, const SDLoc &dl,
3891                                         SelectionDAG &DAG) {
3892   // If we are testing for all-bits-clear, we might be able to do that with
3893   // less shifting since bit-order does not matter.
3894   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3895     return SDValue();
3896 
3897   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3898   if (!C1 || !C1->isZero())
3899     return SDValue();
3900 
3901   if (!N0.hasOneUse() ||
3902       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
3903     return SDValue();
3904 
3905   unsigned BitWidth = N0.getScalarValueSizeInBits();
3906   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
3907   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
3908     return SDValue();
3909 
3910   // Canonicalize fshr as fshl to reduce pattern-matching.
3911   unsigned ShAmt = ShAmtC->getZExtValue();
3912   if (N0.getOpcode() == ISD::FSHR)
3913     ShAmt = BitWidth - ShAmt;
3914 
3915   // Match an 'or' with a specific operand 'Other' in either commuted variant.
3916   SDValue X, Y;
3917   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
3918     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
3919       return false;
3920     if (Or.getOperand(0) == Other) {
3921       X = Or.getOperand(0);
3922       Y = Or.getOperand(1);
3923       return true;
3924     }
3925     if (Or.getOperand(1) == Other) {
3926       X = Or.getOperand(1);
3927       Y = Or.getOperand(0);
3928       return true;
3929     }
3930     return false;
3931   };
3932 
3933   EVT OpVT = N0.getValueType();
3934   EVT ShAmtVT = N0.getOperand(2).getValueType();
3935   SDValue F0 = N0.getOperand(0);
3936   SDValue F1 = N0.getOperand(1);
3937   if (matchOr(F0, F1)) {
3938     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
3939     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
3940     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
3941     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
3942     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3943   }
3944   if (matchOr(F1, F0)) {
3945     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
3946     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
3947     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
3948     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
3949     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3950   }
3951 
3952   return SDValue();
3953 }
3954 
3955 /// Try to simplify a setcc built with the specified operands and cc. If it is
3956 /// unable to simplify it, return a null SDValue.
3957 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3958                                       ISD::CondCode Cond, bool foldBooleans,
3959                                       DAGCombinerInfo &DCI,
3960                                       const SDLoc &dl) const {
3961   SelectionDAG &DAG = DCI.DAG;
3962   const DataLayout &Layout = DAG.getDataLayout();
3963   EVT OpVT = N0.getValueType();
3964 
3965   // Constant fold or commute setcc.
3966   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3967     return Fold;
3968 
3969   bool N0ConstOrSplat =
3970       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
3971   bool N1ConstOrSplat =
3972       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
3973 
3974   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3975   // TODO: Handle non-splat vector constants. All undef causes trouble.
3976   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3977   // infinite loop here when we encounter one.
3978   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3979   if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
3980       (DCI.isBeforeLegalizeOps() ||
3981        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3982     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3983 
3984   // If we have a subtract with the same 2 non-constant operands as this setcc
3985   // -- but in reverse order -- then try to commute the operands of this setcc
3986   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3987   // instruction on some targets.
3988   if (!N0ConstOrSplat && !N1ConstOrSplat &&
3989       (DCI.isBeforeLegalizeOps() ||
3990        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3991       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
3992       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
3993     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3994 
3995   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
3996     return V;
3997 
3998   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
3999     return V;
4000 
4001   if (auto *N1C = isConstOrConstSplat(N1)) {
4002     const APInt &C1 = N1C->getAPIntValue();
4003 
4004     // Optimize some CTPOP cases.
4005     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4006       return V;
4007 
4008     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4009     // equality comparison, then we're just comparing whether X itself is
4010     // zero.
4011     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4012         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4013         isPowerOf2_32(N0.getScalarValueSizeInBits())) {
4014       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4015         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4016             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4017           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4018             // (srl (ctlz x), 5) == 0  -> X != 0
4019             // (srl (ctlz x), 5) != 1  -> X != 0
4020             Cond = ISD::SETNE;
4021           } else {
4022             // (srl (ctlz x), 5) != 0  -> X == 0
4023             // (srl (ctlz x), 5) == 1  -> X == 0
4024             Cond = ISD::SETEQ;
4025           }
4026           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4027           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4028                               Cond);
4029         }
4030       }
4031     }
4032   }
4033 
4034   // FIXME: Support vectors.
4035   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4036     const APInt &C1 = N1C->getAPIntValue();
4037 
4038     // (zext x) == C --> x == (trunc C)
4039     // (sext x) == C --> x == (trunc C)
4040     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4041         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4042       unsigned MinBits = N0.getValueSizeInBits();
4043       SDValue PreExt;
4044       bool Signed = false;
4045       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4046         // ZExt
4047         MinBits = N0->getOperand(0).getValueSizeInBits();
4048         PreExt = N0->getOperand(0);
4049       } else if (N0->getOpcode() == ISD::AND) {
4050         // DAGCombine turns costly ZExts into ANDs
4051         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4052           if ((C->getAPIntValue()+1).isPowerOf2()) {
4053             MinBits = C->getAPIntValue().countTrailingOnes();
4054             PreExt = N0->getOperand(0);
4055           }
4056       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4057         // SExt
4058         MinBits = N0->getOperand(0).getValueSizeInBits();
4059         PreExt = N0->getOperand(0);
4060         Signed = true;
4061       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4062         // ZEXTLOAD / SEXTLOAD
4063         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4064           MinBits = LN0->getMemoryVT().getSizeInBits();
4065           PreExt = N0;
4066         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4067           Signed = true;
4068           MinBits = LN0->getMemoryVT().getSizeInBits();
4069           PreExt = N0;
4070         }
4071       }
4072 
4073       // Figure out how many bits we need to preserve this constant.
4074       unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4075 
4076       // Make sure we're not losing bits from the constant.
4077       if (MinBits > 0 &&
4078           MinBits < C1.getBitWidth() &&
4079           MinBits >= ReqdBits) {
4080         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4081         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4082           // Will get folded away.
4083           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4084           if (MinBits == 1 && C1 == 1)
4085             // Invert the condition.
4086             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4087                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4088           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4089           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4090         }
4091 
4092         // If truncating the setcc operands is not desirable, we can still
4093         // simplify the expression in some cases:
4094         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4095         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4096         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4097         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4098         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4099         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4100         SDValue TopSetCC = N0->getOperand(0);
4101         unsigned N0Opc = N0->getOpcode();
4102         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4103         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4104             TopSetCC.getOpcode() == ISD::SETCC &&
4105             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4106             (isConstFalseVal(N1) ||
4107              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4108 
4109           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4110                          (!N1C->isZero() && Cond == ISD::SETNE);
4111 
4112           if (!Inverse)
4113             return TopSetCC;
4114 
4115           ISD::CondCode InvCond = ISD::getSetCCInverse(
4116               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4117               TopSetCC.getOperand(0).getValueType());
4118           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4119                                       TopSetCC.getOperand(1),
4120                                       InvCond);
4121         }
4122       }
4123     }
4124 
4125     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4126     // equality or unsigned, and all 1 bits of the const are in the same
4127     // partial word, see if we can shorten the load.
4128     if (DCI.isBeforeLegalize() &&
4129         !ISD::isSignedIntSetCC(Cond) &&
4130         N0.getOpcode() == ISD::AND && C1 == 0 &&
4131         N0.getNode()->hasOneUse() &&
4132         isa<LoadSDNode>(N0.getOperand(0)) &&
4133         N0.getOperand(0).getNode()->hasOneUse() &&
4134         isa<ConstantSDNode>(N0.getOperand(1))) {
4135       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4136       APInt bestMask;
4137       unsigned bestWidth = 0, bestOffset = 0;
4138       if (Lod->isSimple() && Lod->isUnindexed()) {
4139         unsigned origWidth = N0.getValueSizeInBits();
4140         unsigned maskWidth = origWidth;
4141         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4142         // 8 bits, but have to be careful...
4143         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4144           origWidth = Lod->getMemoryVT().getSizeInBits();
4145         const APInt &Mask = N0.getConstantOperandAPInt(1);
4146         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4147           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4148           for (unsigned offset=0; offset<origWidth/width; offset++) {
4149             if (Mask.isSubsetOf(newMask)) {
4150               if (Layout.isLittleEndian())
4151                 bestOffset = (uint64_t)offset * (width/8);
4152               else
4153                 bestOffset = (origWidth/width - offset - 1) * (width/8);
4154               bestMask = Mask.lshr(offset * (width/8) * 8);
4155               bestWidth = width;
4156               break;
4157             }
4158             newMask <<= width;
4159           }
4160         }
4161       }
4162       if (bestWidth) {
4163         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4164         if (newVT.isRound() &&
4165             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4166           SDValue Ptr = Lod->getBasePtr();
4167           if (bestOffset != 0)
4168             Ptr =
4169                 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4170           SDValue NewLoad =
4171               DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4172                           Lod->getPointerInfo().getWithOffset(bestOffset),
4173                           Lod->getOriginalAlign());
4174           return DAG.getSetCC(dl, VT,
4175                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4176                                       DAG.getConstant(bestMask.trunc(bestWidth),
4177                                                       dl, newVT)),
4178                               DAG.getConstant(0LL, dl, newVT), Cond);
4179         }
4180       }
4181     }
4182 
4183     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4184     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4185       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4186 
4187       // If the comparison constant has bits in the upper part, the
4188       // zero-extended value could never match.
4189       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4190                                               C1.getBitWidth() - InSize))) {
4191         switch (Cond) {
4192         case ISD::SETUGT:
4193         case ISD::SETUGE:
4194         case ISD::SETEQ:
4195           return DAG.getConstant(0, dl, VT);
4196         case ISD::SETULT:
4197         case ISD::SETULE:
4198         case ISD::SETNE:
4199           return DAG.getConstant(1, dl, VT);
4200         case ISD::SETGT:
4201         case ISD::SETGE:
4202           // True if the sign bit of C1 is set.
4203           return DAG.getConstant(C1.isNegative(), dl, VT);
4204         case ISD::SETLT:
4205         case ISD::SETLE:
4206           // True if the sign bit of C1 isn't set.
4207           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4208         default:
4209           break;
4210         }
4211       }
4212 
4213       // Otherwise, we can perform the comparison with the low bits.
4214       switch (Cond) {
4215       case ISD::SETEQ:
4216       case ISD::SETNE:
4217       case ISD::SETUGT:
4218       case ISD::SETUGE:
4219       case ISD::SETULT:
4220       case ISD::SETULE: {
4221         EVT newVT = N0.getOperand(0).getValueType();
4222         if (DCI.isBeforeLegalizeOps() ||
4223             (isOperationLegal(ISD::SETCC, newVT) &&
4224              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4225           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4226           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4227 
4228           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4229                                           NewConst, Cond);
4230           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4231         }
4232         break;
4233       }
4234       default:
4235         break; // todo, be more careful with signed comparisons
4236       }
4237     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4238                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4239                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4240                                       OpVT)) {
4241       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4242       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4243       EVT ExtDstTy = N0.getValueType();
4244       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4245 
4246       // If the constant doesn't fit into the number of bits for the source of
4247       // the sign extension, it is impossible for both sides to be equal.
4248       if (C1.getMinSignedBits() > ExtSrcTyBits)
4249         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4250 
4251       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4252              ExtDstTy != ExtSrcTy && "Unexpected types!");
4253       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4254       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4255                                    DAG.getConstant(Imm, dl, ExtDstTy));
4256       if (!DCI.isCalledByLegalizer())
4257         DCI.AddToWorklist(ZextOp.getNode());
4258       // Otherwise, make this a use of a zext.
4259       return DAG.getSetCC(dl, VT, ZextOp,
4260                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4261     } else if ((N1C->isZero() || N1C->isOne()) &&
4262                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4263       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
4264       if (N0.getOpcode() == ISD::SETCC &&
4265           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4266           (N0.getValueType() == MVT::i1 ||
4267            getBooleanContents(N0.getOperand(0).getValueType()) ==
4268                        ZeroOrOneBooleanContent)) {
4269         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4270         if (TrueWhenTrue)
4271           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4272         // Invert the condition.
4273         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4274         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4275         if (DCI.isBeforeLegalizeOps() ||
4276             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4277           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4278       }
4279 
4280       if ((N0.getOpcode() == ISD::XOR ||
4281            (N0.getOpcode() == ISD::AND &&
4282             N0.getOperand(0).getOpcode() == ISD::XOR &&
4283             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4284           isOneConstant(N0.getOperand(1))) {
4285         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4286         // can only do this if the top bits are known zero.
4287         unsigned BitWidth = N0.getValueSizeInBits();
4288         if (DAG.MaskedValueIsZero(N0,
4289                                   APInt::getHighBitsSet(BitWidth,
4290                                                         BitWidth-1))) {
4291           // Okay, get the un-inverted input value.
4292           SDValue Val;
4293           if (N0.getOpcode() == ISD::XOR) {
4294             Val = N0.getOperand(0);
4295           } else {
4296             assert(N0.getOpcode() == ISD::AND &&
4297                     N0.getOperand(0).getOpcode() == ISD::XOR);
4298             // ((X^1)&1)^1 -> X & 1
4299             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4300                               N0.getOperand(0).getOperand(0),
4301                               N0.getOperand(1));
4302           }
4303 
4304           return DAG.getSetCC(dl, VT, Val, N1,
4305                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4306         }
4307       } else if (N1C->isOne()) {
4308         SDValue Op0 = N0;
4309         if (Op0.getOpcode() == ISD::TRUNCATE)
4310           Op0 = Op0.getOperand(0);
4311 
4312         if ((Op0.getOpcode() == ISD::XOR) &&
4313             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4314             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4315           SDValue XorLHS = Op0.getOperand(0);
4316           SDValue XorRHS = Op0.getOperand(1);
4317           // Ensure that the input setccs return an i1 type or 0/1 value.
4318           if (Op0.getValueType() == MVT::i1 ||
4319               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4320                       ZeroOrOneBooleanContent &&
4321                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4322                         ZeroOrOneBooleanContent)) {
4323             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4324             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4325             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4326           }
4327         }
4328         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4329           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4330           if (Op0.getValueType().bitsGT(VT))
4331             Op0 = DAG.getNode(ISD::AND, dl, VT,
4332                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4333                           DAG.getConstant(1, dl, VT));
4334           else if (Op0.getValueType().bitsLT(VT))
4335             Op0 = DAG.getNode(ISD::AND, dl, VT,
4336                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4337                         DAG.getConstant(1, dl, VT));
4338 
4339           return DAG.getSetCC(dl, VT, Op0,
4340                               DAG.getConstant(0, dl, Op0.getValueType()),
4341                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4342         }
4343         if (Op0.getOpcode() == ISD::AssertZext &&
4344             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4345           return DAG.getSetCC(dl, VT, Op0,
4346                               DAG.getConstant(0, dl, Op0.getValueType()),
4347                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4348       }
4349     }
4350 
4351     // Given:
4352     //   icmp eq/ne (urem %x, %y), 0
4353     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4354     //   icmp eq/ne %x, 0
4355     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4356         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4357       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4358       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4359       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4360         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4361     }
4362 
4363     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4364     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4365     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4366         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4367         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4368         N1C && N1C->isAllOnes()) {
4369       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4370                           DAG.getConstant(0, dl, OpVT),
4371                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4372     }
4373 
4374     if (SDValue V =
4375             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4376       return V;
4377   }
4378 
4379   // These simplifications apply to splat vectors as well.
4380   // TODO: Handle more splat vector cases.
4381   if (auto *N1C = isConstOrConstSplat(N1)) {
4382     const APInt &C1 = N1C->getAPIntValue();
4383 
4384     APInt MinVal, MaxVal;
4385     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4386     if (ISD::isSignedIntSetCC(Cond)) {
4387       MinVal = APInt::getSignedMinValue(OperandBitSize);
4388       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4389     } else {
4390       MinVal = APInt::getMinValue(OperandBitSize);
4391       MaxVal = APInt::getMaxValue(OperandBitSize);
4392     }
4393 
4394     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4395     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4396       // X >= MIN --> true
4397       if (C1 == MinVal)
4398         return DAG.getBoolConstant(true, dl, VT, OpVT);
4399 
4400       if (!VT.isVector()) { // TODO: Support this for vectors.
4401         // X >= C0 --> X > (C0 - 1)
4402         APInt C = C1 - 1;
4403         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4404         if ((DCI.isBeforeLegalizeOps() ||
4405              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4406             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4407                                   isLegalICmpImmediate(C.getSExtValue())))) {
4408           return DAG.getSetCC(dl, VT, N0,
4409                               DAG.getConstant(C, dl, N1.getValueType()),
4410                               NewCC);
4411         }
4412       }
4413     }
4414 
4415     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4416       // X <= MAX --> true
4417       if (C1 == MaxVal)
4418         return DAG.getBoolConstant(true, dl, VT, OpVT);
4419 
4420       // X <= C0 --> X < (C0 + 1)
4421       if (!VT.isVector()) { // TODO: Support this for vectors.
4422         APInt C = C1 + 1;
4423         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4424         if ((DCI.isBeforeLegalizeOps() ||
4425              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4426             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4427                                   isLegalICmpImmediate(C.getSExtValue())))) {
4428           return DAG.getSetCC(dl, VT, N0,
4429                               DAG.getConstant(C, dl, N1.getValueType()),
4430                               NewCC);
4431         }
4432       }
4433     }
4434 
4435     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4436       if (C1 == MinVal)
4437         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4438 
4439       // TODO: Support this for vectors after legalize ops.
4440       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4441         // Canonicalize setlt X, Max --> setne X, Max
4442         if (C1 == MaxVal)
4443           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4444 
4445         // If we have setult X, 1, turn it into seteq X, 0
4446         if (C1 == MinVal+1)
4447           return DAG.getSetCC(dl, VT, N0,
4448                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4449                               ISD::SETEQ);
4450       }
4451     }
4452 
4453     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4454       if (C1 == MaxVal)
4455         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4456 
4457       // TODO: Support this for vectors after legalize ops.
4458       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4459         // Canonicalize setgt X, Min --> setne X, Min
4460         if (C1 == MinVal)
4461           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4462 
4463         // If we have setugt X, Max-1, turn it into seteq X, Max
4464         if (C1 == MaxVal-1)
4465           return DAG.getSetCC(dl, VT, N0,
4466                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
4467                               ISD::SETEQ);
4468       }
4469     }
4470 
4471     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4472       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4473       if (C1.isZero())
4474         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4475                 VT, N0, N1, Cond, DCI, dl))
4476           return CC;
4477 
4478       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4479       // For example, when high 32-bits of i64 X are known clear:
4480       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
4481       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
4482       bool CmpZero = N1C->getAPIntValue().isZero();
4483       bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
4484       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4485         // Match or(lo,shl(hi,bw/2)) pattern.
4486         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4487           unsigned EltBits = V.getScalarValueSizeInBits();
4488           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4489             return false;
4490           SDValue LHS = V.getOperand(0);
4491           SDValue RHS = V.getOperand(1);
4492           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4493           // Unshifted element must have zero upperbits.
4494           if (RHS.getOpcode() == ISD::SHL &&
4495               isa<ConstantSDNode>(RHS.getOperand(1)) &&
4496               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4497               DAG.MaskedValueIsZero(LHS, HiBits)) {
4498             Lo = LHS;
4499             Hi = RHS.getOperand(0);
4500             return true;
4501           }
4502           if (LHS.getOpcode() == ISD::SHL &&
4503               isa<ConstantSDNode>(LHS.getOperand(1)) &&
4504               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4505               DAG.MaskedValueIsZero(RHS, HiBits)) {
4506             Lo = RHS;
4507             Hi = LHS.getOperand(0);
4508             return true;
4509           }
4510           return false;
4511         };
4512 
4513         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4514           unsigned EltBits = N0.getScalarValueSizeInBits();
4515           unsigned HalfBits = EltBits / 2;
4516           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4517           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4518           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4519           SDValue NewN0 =
4520               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4521           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4522           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4523         };
4524 
4525         SDValue Lo, Hi;
4526         if (IsConcat(N0, Lo, Hi))
4527           return MergeConcat(Lo, Hi);
4528 
4529         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4530           SDValue Lo0, Lo1, Hi0, Hi1;
4531           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4532               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4533             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4534                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4535           }
4536         }
4537       }
4538     }
4539 
4540     // If we have "setcc X, C0", check to see if we can shrink the immediate
4541     // by changing cc.
4542     // TODO: Support this for vectors after legalize ops.
4543     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4544       // SETUGT X, SINTMAX  -> SETLT X, 0
4545       // SETUGE X, SINTMIN -> SETLT X, 0
4546       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4547           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4548         return DAG.getSetCC(dl, VT, N0,
4549                             DAG.getConstant(0, dl, N1.getValueType()),
4550                             ISD::SETLT);
4551 
4552       // SETULT X, SINTMIN  -> SETGT X, -1
4553       // SETULE X, SINTMAX  -> SETGT X, -1
4554       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4555           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4556         return DAG.getSetCC(dl, VT, N0,
4557                             DAG.getAllOnesConstant(dl, N1.getValueType()),
4558                             ISD::SETGT);
4559     }
4560   }
4561 
4562   // Back to non-vector simplifications.
4563   // TODO: Can we do these for vector splats?
4564   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4565     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4566     const APInt &C1 = N1C->getAPIntValue();
4567     EVT ShValTy = N0.getValueType();
4568 
4569     // Fold bit comparisons when we can. This will result in an
4570     // incorrect value when boolean false is negative one, unless
4571     // the bitsize is 1 in which case the false value is the same
4572     // in practice regardless of the representation.
4573     if ((VT.getSizeInBits() == 1 ||
4574          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4575         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4576         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4577         N0.getOpcode() == ISD::AND) {
4578       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4579         EVT ShiftTy =
4580             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4581         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
4582           // Perform the xform if the AND RHS is a single bit.
4583           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4584           if (AndRHS->getAPIntValue().isPowerOf2() &&
4585               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4586             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4587                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4588                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4589           }
4590         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4591           // (X & 8) == 8  -->  (X & 8) >> 3
4592           // Perform the xform if C1 is a single bit.
4593           unsigned ShCt = C1.logBase2();
4594           if (C1.isPowerOf2() &&
4595               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4596             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4597                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4598                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4599           }
4600         }
4601       }
4602     }
4603 
4604     if (C1.getMinSignedBits() <= 64 &&
4605         !isLegalICmpImmediate(C1.getSExtValue())) {
4606       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4607       // (X & -256) == 256 -> (X >> 8) == 1
4608       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4609           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4610         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4611           const APInt &AndRHSC = AndRHS->getAPIntValue();
4612           if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
4613             unsigned ShiftBits = AndRHSC.countTrailingZeros();
4614             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4615               SDValue Shift =
4616                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4617                             DAG.getConstant(ShiftBits, dl, ShiftTy));
4618               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4619               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4620             }
4621           }
4622         }
4623       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4624                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4625         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4626         // X <  0x100000000 -> (X >> 32) <  1
4627         // X >= 0x100000000 -> (X >> 32) >= 1
4628         // X <= 0x0ffffffff -> (X >> 32) <  1
4629         // X >  0x0ffffffff -> (X >> 32) >= 1
4630         unsigned ShiftBits;
4631         APInt NewC = C1;
4632         ISD::CondCode NewCond = Cond;
4633         if (AdjOne) {
4634           ShiftBits = C1.countTrailingOnes();
4635           NewC = NewC + 1;
4636           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4637         } else {
4638           ShiftBits = C1.countTrailingZeros();
4639         }
4640         NewC.lshrInPlace(ShiftBits);
4641         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4642             isLegalICmpImmediate(NewC.getSExtValue()) &&
4643             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4644           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4645                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
4646           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4647           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4648         }
4649       }
4650     }
4651   }
4652 
4653   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4654     auto *CFP = cast<ConstantFPSDNode>(N1);
4655     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4656 
4657     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
4658     // constant if knowing that the operand is non-nan is enough.  We prefer to
4659     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4660     // materialize 0.0.
4661     if (Cond == ISD::SETO || Cond == ISD::SETUO)
4662       return DAG.getSetCC(dl, VT, N0, N0, Cond);
4663 
4664     // setcc (fneg x), C -> setcc swap(pred) x, -C
4665     if (N0.getOpcode() == ISD::FNEG) {
4666       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
4667       if (DCI.isBeforeLegalizeOps() ||
4668           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4669         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4670         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4671       }
4672     }
4673 
4674     // If the condition is not legal, see if we can find an equivalent one
4675     // which is legal.
4676     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4677       // If the comparison was an awkward floating-point == or != and one of
4678       // the comparison operands is infinity or negative infinity, convert the
4679       // condition to a less-awkward <= or >=.
4680       if (CFP->getValueAPF().isInfinity()) {
4681         bool IsNegInf = CFP->getValueAPF().isNegative();
4682         ISD::CondCode NewCond = ISD::SETCC_INVALID;
4683         switch (Cond) {
4684         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4685         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4686         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4687         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4688         default: break;
4689         }
4690         if (NewCond != ISD::SETCC_INVALID &&
4691             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4692           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4693       }
4694     }
4695   }
4696 
4697   if (N0 == N1) {
4698     // The sext(setcc()) => setcc() optimization relies on the appropriate
4699     // constant being emitted.
4700     assert(!N0.getValueType().isInteger() &&
4701            "Integer types should be handled by FoldSetCC");
4702 
4703     bool EqTrue = ISD::isTrueWhenEqual(Cond);
4704     unsigned UOF = ISD::getUnorderedFlavor(Cond);
4705     if (UOF == 2) // FP operators that are undefined on NaNs.
4706       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4707     if (UOF == unsigned(EqTrue))
4708       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4709     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
4710     // if it is not already.
4711     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4712     if (NewCond != Cond &&
4713         (DCI.isBeforeLegalizeOps() ||
4714                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4715       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4716   }
4717 
4718   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4719       N0.getValueType().isInteger()) {
4720     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4721         N0.getOpcode() == ISD::XOR) {
4722       // Simplify (X+Y) == (X+Z) -->  Y == Z
4723       if (N0.getOpcode() == N1.getOpcode()) {
4724         if (N0.getOperand(0) == N1.getOperand(0))
4725           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4726         if (N0.getOperand(1) == N1.getOperand(1))
4727           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4728         if (isCommutativeBinOp(N0.getOpcode())) {
4729           // If X op Y == Y op X, try other combinations.
4730           if (N0.getOperand(0) == N1.getOperand(1))
4731             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4732                                 Cond);
4733           if (N0.getOperand(1) == N1.getOperand(0))
4734             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4735                                 Cond);
4736         }
4737       }
4738 
4739       // If RHS is a legal immediate value for a compare instruction, we need
4740       // to be careful about increasing register pressure needlessly.
4741       bool LegalRHSImm = false;
4742 
4743       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4744         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4745           // Turn (X+C1) == C2 --> X == C2-C1
4746           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
4747             return DAG.getSetCC(
4748                 dl, VT, N0.getOperand(0),
4749                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
4750                                 dl, N0.getValueType()),
4751                 Cond);
4752 
4753           // Turn (X^C1) == C2 --> X == C1^C2
4754           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
4755             return DAG.getSetCC(
4756                 dl, VT, N0.getOperand(0),
4757                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
4758                                 dl, N0.getValueType()),
4759                 Cond);
4760         }
4761 
4762         // Turn (C1-X) == C2 --> X == C1-C2
4763         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
4764           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
4765             return DAG.getSetCC(
4766                 dl, VT, N0.getOperand(1),
4767                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
4768                                 dl, N0.getValueType()),
4769                 Cond);
4770 
4771         // Could RHSC fold directly into a compare?
4772         if (RHSC->getValueType(0).getSizeInBits() <= 64)
4773           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4774       }
4775 
4776       // (X+Y) == X --> Y == 0 and similar folds.
4777       // Don't do this if X is an immediate that can fold into a cmp
4778       // instruction and X+Y has other uses. It could be an induction variable
4779       // chain, and the transform would increase register pressure.
4780       if (!LegalRHSImm || N0.hasOneUse())
4781         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4782           return V;
4783     }
4784 
4785     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4786         N1.getOpcode() == ISD::XOR)
4787       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4788         return V;
4789 
4790     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4791       return V;
4792   }
4793 
4794   // Fold remainder of division by a constant.
4795   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4796       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4797     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4798 
4799     // When division is cheap or optimizing for minimum size,
4800     // fall through to DIVREM creation by skipping this fold.
4801     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
4802       if (N0.getOpcode() == ISD::UREM) {
4803         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4804           return Folded;
4805       } else if (N0.getOpcode() == ISD::SREM) {
4806         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4807           return Folded;
4808       }
4809     }
4810   }
4811 
4812   // Fold away ALL boolean setcc's.
4813   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4814     SDValue Temp;
4815     switch (Cond) {
4816     default: llvm_unreachable("Unknown integer setcc!");
4817     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
4818       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4819       N0 = DAG.getNOT(dl, Temp, OpVT);
4820       if (!DCI.isCalledByLegalizer())
4821         DCI.AddToWorklist(Temp.getNode());
4822       break;
4823     case ISD::SETNE:  // X != Y   -->  (X^Y)
4824       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4825       break;
4826     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
4827     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
4828       Temp = DAG.getNOT(dl, N0, OpVT);
4829       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4830       if (!DCI.isCalledByLegalizer())
4831         DCI.AddToWorklist(Temp.getNode());
4832       break;
4833     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
4834     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
4835       Temp = DAG.getNOT(dl, N1, OpVT);
4836       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4837       if (!DCI.isCalledByLegalizer())
4838         DCI.AddToWorklist(Temp.getNode());
4839       break;
4840     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
4841     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
4842       Temp = DAG.getNOT(dl, N0, OpVT);
4843       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4844       if (!DCI.isCalledByLegalizer())
4845         DCI.AddToWorklist(Temp.getNode());
4846       break;
4847     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
4848     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
4849       Temp = DAG.getNOT(dl, N1, OpVT);
4850       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4851       break;
4852     }
4853     if (VT.getScalarType() != MVT::i1) {
4854       if (!DCI.isCalledByLegalizer())
4855         DCI.AddToWorklist(N0.getNode());
4856       // FIXME: If running after legalize, we probably can't do this.
4857       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4858       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4859     }
4860     return N0;
4861   }
4862 
4863   // Could not fold it.
4864   return SDValue();
4865 }
4866 
4867 /// Returns true (and the GlobalValue and the offset) if the node is a
4868 /// GlobalAddress + offset.
4869 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4870                                     int64_t &Offset) const {
4871 
4872   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4873 
4874   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4875     GA = GASD->getGlobal();
4876     Offset += GASD->getOffset();
4877     return true;
4878   }
4879 
4880   if (N->getOpcode() == ISD::ADD) {
4881     SDValue N1 = N->getOperand(0);
4882     SDValue N2 = N->getOperand(1);
4883     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4884       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4885         Offset += V->getSExtValue();
4886         return true;
4887       }
4888     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4889       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4890         Offset += V->getSExtValue();
4891         return true;
4892       }
4893     }
4894   }
4895 
4896   return false;
4897 }
4898 
4899 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4900                                           DAGCombinerInfo &DCI) const {
4901   // Default implementation: no optimization.
4902   return SDValue();
4903 }
4904 
4905 //===----------------------------------------------------------------------===//
4906 //  Inline Assembler Implementation Methods
4907 //===----------------------------------------------------------------------===//
4908 
4909 TargetLowering::ConstraintType
4910 TargetLowering::getConstraintType(StringRef Constraint) const {
4911   unsigned S = Constraint.size();
4912 
4913   if (S == 1) {
4914     switch (Constraint[0]) {
4915     default: break;
4916     case 'r':
4917       return C_RegisterClass;
4918     case 'm': // memory
4919     case 'o': // offsetable
4920     case 'V': // not offsetable
4921       return C_Memory;
4922     case 'p': // Address.
4923       return C_Address;
4924     case 'n': // Simple Integer
4925     case 'E': // Floating Point Constant
4926     case 'F': // Floating Point Constant
4927       return C_Immediate;
4928     case 'i': // Simple Integer or Relocatable Constant
4929     case 's': // Relocatable Constant
4930     case 'X': // Allow ANY value.
4931     case 'I': // Target registers.
4932     case 'J':
4933     case 'K':
4934     case 'L':
4935     case 'M':
4936     case 'N':
4937     case 'O':
4938     case 'P':
4939     case '<':
4940     case '>':
4941       return C_Other;
4942     }
4943   }
4944 
4945   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4946     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4947       return C_Memory;
4948     return C_Register;
4949   }
4950   return C_Unknown;
4951 }
4952 
4953 /// Try to replace an X constraint, which matches anything, with another that
4954 /// has more specific requirements based on the type of the corresponding
4955 /// operand.
4956 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4957   if (ConstraintVT.isInteger())
4958     return "r";
4959   if (ConstraintVT.isFloatingPoint())
4960     return "f"; // works for many targets
4961   return nullptr;
4962 }
4963 
4964 SDValue TargetLowering::LowerAsmOutputForConstraint(
4965     SDValue &Chain, SDValue &Flag, const SDLoc &DL,
4966     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
4967   return SDValue();
4968 }
4969 
4970 /// Lower the specified operand into the Ops vector.
4971 /// If it is invalid, don't add anything to Ops.
4972 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4973                                                   std::string &Constraint,
4974                                                   std::vector<SDValue> &Ops,
4975                                                   SelectionDAG &DAG) const {
4976 
4977   if (Constraint.length() > 1) return;
4978 
4979   char ConstraintLetter = Constraint[0];
4980   switch (ConstraintLetter) {
4981   default: break;
4982   case 'X':    // Allows any operand
4983   case 'i':    // Simple Integer or Relocatable Constant
4984   case 'n':    // Simple Integer
4985   case 's': {  // Relocatable Constant
4986 
4987     ConstantSDNode *C;
4988     uint64_t Offset = 0;
4989 
4990     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4991     // etc., since getelementpointer is variadic. We can't use
4992     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4993     // while in this case the GA may be furthest from the root node which is
4994     // likely an ISD::ADD.
4995     while (true) {
4996       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
4997         // gcc prints these as sign extended.  Sign extend value to 64 bits
4998         // now; without this it would get ZExt'd later in
4999         // ScheduleDAGSDNodes::EmitNode, which is very generic.
5000         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5001         BooleanContent BCont = getBooleanContents(MVT::i64);
5002         ISD::NodeType ExtOpc =
5003             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5004         int64_t ExtVal =
5005             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5006         Ops.push_back(
5007             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5008         return;
5009       }
5010       if (ConstraintLetter != 'n') {
5011         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5012           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5013                                                    GA->getValueType(0),
5014                                                    Offset + GA->getOffset()));
5015           return;
5016         }
5017         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5018           Ops.push_back(DAG.getTargetBlockAddress(
5019               BA->getBlockAddress(), BA->getValueType(0),
5020               Offset + BA->getOffset(), BA->getTargetFlags()));
5021           return;
5022         }
5023         if (isa<BasicBlockSDNode>(Op)) {
5024           Ops.push_back(Op);
5025           return;
5026         }
5027       }
5028       const unsigned OpCode = Op.getOpcode();
5029       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5030         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5031           Op = Op.getOperand(1);
5032         // Subtraction is not commutative.
5033         else if (OpCode == ISD::ADD &&
5034                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5035           Op = Op.getOperand(0);
5036         else
5037           return;
5038         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5039         continue;
5040       }
5041       return;
5042     }
5043     break;
5044   }
5045   }
5046 }
5047 
5048 std::pair<unsigned, const TargetRegisterClass *>
5049 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5050                                              StringRef Constraint,
5051                                              MVT VT) const {
5052   if (Constraint.empty() || Constraint[0] != '{')
5053     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5054   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5055 
5056   // Remove the braces from around the name.
5057   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5058 
5059   std::pair<unsigned, const TargetRegisterClass *> R =
5060       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5061 
5062   // Figure out which register class contains this reg.
5063   for (const TargetRegisterClass *RC : RI->regclasses()) {
5064     // If none of the value types for this register class are valid, we
5065     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5066     if (!isLegalRC(*RI, *RC))
5067       continue;
5068 
5069     for (const MCPhysReg &PR : *RC) {
5070       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5071         std::pair<unsigned, const TargetRegisterClass *> S =
5072             std::make_pair(PR, RC);
5073 
5074         // If this register class has the requested value type, return it,
5075         // otherwise keep searching and return the first class found
5076         // if no other is found which explicitly has the requested type.
5077         if (RI->isTypeLegalForClass(*RC, VT))
5078           return S;
5079         if (!R.second)
5080           R = S;
5081       }
5082     }
5083   }
5084 
5085   return R;
5086 }
5087 
5088 //===----------------------------------------------------------------------===//
5089 // Constraint Selection.
5090 
5091 /// Return true of this is an input operand that is a matching constraint like
5092 /// "4".
5093 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5094   assert(!ConstraintCode.empty() && "No known constraint!");
5095   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5096 }
5097 
5098 /// If this is an input matching constraint, this method returns the output
5099 /// operand it matches.
5100 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5101   assert(!ConstraintCode.empty() && "No known constraint!");
5102   return atoi(ConstraintCode.c_str());
5103 }
5104 
5105 /// Split up the constraint string from the inline assembly value into the
5106 /// specific constraints and their prefixes, and also tie in the associated
5107 /// operand values.
5108 /// If this returns an empty vector, and if the constraint string itself
5109 /// isn't empty, there was an error parsing.
5110 TargetLowering::AsmOperandInfoVector
5111 TargetLowering::ParseConstraints(const DataLayout &DL,
5112                                  const TargetRegisterInfo *TRI,
5113                                  const CallBase &Call) const {
5114   /// Information about all of the constraints.
5115   AsmOperandInfoVector ConstraintOperands;
5116   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5117   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5118 
5119   // Do a prepass over the constraints, canonicalizing them, and building up the
5120   // ConstraintOperands list.
5121   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5122   unsigned ResNo = 0; // ResNo - The result number of the next output.
5123 
5124   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5125     ConstraintOperands.emplace_back(std::move(CI));
5126     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5127 
5128     // Update multiple alternative constraint count.
5129     if (OpInfo.multipleAlternatives.size() > maCount)
5130       maCount = OpInfo.multipleAlternatives.size();
5131 
5132     OpInfo.ConstraintVT = MVT::Other;
5133 
5134     // Compute the value type for each operand.
5135     switch (OpInfo.Type) {
5136     case InlineAsm::isOutput:
5137       // Indirect outputs just consume an argument.
5138       if (OpInfo.isIndirect) {
5139         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5140         break;
5141       }
5142 
5143       // The return value of the call is this value.  As such, there is no
5144       // corresponding argument.
5145       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5146       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5147         OpInfo.ConstraintVT =
5148             getSimpleValueType(DL, STy->getElementType(ResNo));
5149       } else {
5150         assert(ResNo == 0 && "Asm only has one result!");
5151         OpInfo.ConstraintVT =
5152             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5153       }
5154       ++ResNo;
5155       break;
5156     case InlineAsm::isInput:
5157       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5158       break;
5159     case InlineAsm::isClobber:
5160       // Nothing to do.
5161       break;
5162     }
5163 
5164     if (OpInfo.CallOperandVal) {
5165       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5166       if (OpInfo.isIndirect) {
5167         OpTy = Call.getParamElementType(ArgNo);
5168         assert(OpTy && "Indirect operand must have elementtype attribute");
5169       }
5170 
5171       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5172       if (StructType *STy = dyn_cast<StructType>(OpTy))
5173         if (STy->getNumElements() == 1)
5174           OpTy = STy->getElementType(0);
5175 
5176       // If OpTy is not a single value, it may be a struct/union that we
5177       // can tile with integers.
5178       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5179         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5180         switch (BitSize) {
5181         default: break;
5182         case 1:
5183         case 8:
5184         case 16:
5185         case 32:
5186         case 64:
5187         case 128:
5188           OpInfo.ConstraintVT =
5189               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
5190           break;
5191         }
5192       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
5193         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
5194         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
5195       } else {
5196         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
5197       }
5198 
5199       ArgNo++;
5200     }
5201   }
5202 
5203   // If we have multiple alternative constraints, select the best alternative.
5204   if (!ConstraintOperands.empty()) {
5205     if (maCount) {
5206       unsigned bestMAIndex = 0;
5207       int bestWeight = -1;
5208       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5209       int weight = -1;
5210       unsigned maIndex;
5211       // Compute the sums of the weights for each alternative, keeping track
5212       // of the best (highest weight) one so far.
5213       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5214         int weightSum = 0;
5215         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5216              cIndex != eIndex; ++cIndex) {
5217           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5218           if (OpInfo.Type == InlineAsm::isClobber)
5219             continue;
5220 
5221           // If this is an output operand with a matching input operand,
5222           // look up the matching input. If their types mismatch, e.g. one
5223           // is an integer, the other is floating point, or their sizes are
5224           // different, flag it as an maCantMatch.
5225           if (OpInfo.hasMatchingInput()) {
5226             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5227             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5228               if ((OpInfo.ConstraintVT.isInteger() !=
5229                    Input.ConstraintVT.isInteger()) ||
5230                   (OpInfo.ConstraintVT.getSizeInBits() !=
5231                    Input.ConstraintVT.getSizeInBits())) {
5232                 weightSum = -1; // Can't match.
5233                 break;
5234               }
5235             }
5236           }
5237           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5238           if (weight == -1) {
5239             weightSum = -1;
5240             break;
5241           }
5242           weightSum += weight;
5243         }
5244         // Update best.
5245         if (weightSum > bestWeight) {
5246           bestWeight = weightSum;
5247           bestMAIndex = maIndex;
5248         }
5249       }
5250 
5251       // Now select chosen alternative in each constraint.
5252       for (AsmOperandInfo &cInfo : ConstraintOperands)
5253         if (cInfo.Type != InlineAsm::isClobber)
5254           cInfo.selectAlternative(bestMAIndex);
5255     }
5256   }
5257 
5258   // Check and hook up tied operands, choose constraint code to use.
5259   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5260        cIndex != eIndex; ++cIndex) {
5261     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5262 
5263     // If this is an output operand with a matching input operand, look up the
5264     // matching input. If their types mismatch, e.g. one is an integer, the
5265     // other is floating point, or their sizes are different, flag it as an
5266     // error.
5267     if (OpInfo.hasMatchingInput()) {
5268       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5269 
5270       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5271         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5272             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5273                                          OpInfo.ConstraintVT);
5274         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5275             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5276                                          Input.ConstraintVT);
5277         if ((OpInfo.ConstraintVT.isInteger() !=
5278              Input.ConstraintVT.isInteger()) ||
5279             (MatchRC.second != InputRC.second)) {
5280           report_fatal_error("Unsupported asm: input constraint"
5281                              " with a matching output constraint of"
5282                              " incompatible type!");
5283         }
5284       }
5285     }
5286   }
5287 
5288   return ConstraintOperands;
5289 }
5290 
5291 /// Return an integer indicating how general CT is.
5292 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
5293   switch (CT) {
5294   case TargetLowering::C_Immediate:
5295   case TargetLowering::C_Other:
5296   case TargetLowering::C_Unknown:
5297     return 0;
5298   case TargetLowering::C_Register:
5299     return 1;
5300   case TargetLowering::C_RegisterClass:
5301     return 2;
5302   case TargetLowering::C_Memory:
5303   case TargetLowering::C_Address:
5304     return 3;
5305   }
5306   llvm_unreachable("Invalid constraint type");
5307 }
5308 
5309 /// Examine constraint type and operand type and determine a weight value.
5310 /// This object must already have been set up with the operand type
5311 /// and the current alternative constraint selected.
5312 TargetLowering::ConstraintWeight
5313   TargetLowering::getMultipleConstraintMatchWeight(
5314     AsmOperandInfo &info, int maIndex) const {
5315   InlineAsm::ConstraintCodeVector *rCodes;
5316   if (maIndex >= (int)info.multipleAlternatives.size())
5317     rCodes = &info.Codes;
5318   else
5319     rCodes = &info.multipleAlternatives[maIndex].Codes;
5320   ConstraintWeight BestWeight = CW_Invalid;
5321 
5322   // Loop over the options, keeping track of the most general one.
5323   for (const std::string &rCode : *rCodes) {
5324     ConstraintWeight weight =
5325         getSingleConstraintMatchWeight(info, rCode.c_str());
5326     if (weight > BestWeight)
5327       BestWeight = weight;
5328   }
5329 
5330   return BestWeight;
5331 }
5332 
5333 /// Examine constraint type and operand type and determine a weight value.
5334 /// This object must already have been set up with the operand type
5335 /// and the current alternative constraint selected.
5336 TargetLowering::ConstraintWeight
5337   TargetLowering::getSingleConstraintMatchWeight(
5338     AsmOperandInfo &info, const char *constraint) const {
5339   ConstraintWeight weight = CW_Invalid;
5340   Value *CallOperandVal = info.CallOperandVal;
5341     // If we don't have a value, we can't do a match,
5342     // but allow it at the lowest weight.
5343   if (!CallOperandVal)
5344     return CW_Default;
5345   // Look at the constraint type.
5346   switch (*constraint) {
5347     case 'i': // immediate integer.
5348     case 'n': // immediate integer with a known value.
5349       if (isa<ConstantInt>(CallOperandVal))
5350         weight = CW_Constant;
5351       break;
5352     case 's': // non-explicit intregal immediate.
5353       if (isa<GlobalValue>(CallOperandVal))
5354         weight = CW_Constant;
5355       break;
5356     case 'E': // immediate float if host format.
5357     case 'F': // immediate float.
5358       if (isa<ConstantFP>(CallOperandVal))
5359         weight = CW_Constant;
5360       break;
5361     case '<': // memory operand with autodecrement.
5362     case '>': // memory operand with autoincrement.
5363     case 'm': // memory operand.
5364     case 'o': // offsettable memory operand
5365     case 'V': // non-offsettable memory operand
5366       weight = CW_Memory;
5367       break;
5368     case 'r': // general register.
5369     case 'g': // general register, memory operand or immediate integer.
5370               // note: Clang converts "g" to "imr".
5371       if (CallOperandVal->getType()->isIntegerTy())
5372         weight = CW_Register;
5373       break;
5374     case 'X': // any operand.
5375   default:
5376     weight = CW_Default;
5377     break;
5378   }
5379   return weight;
5380 }
5381 
5382 /// If there are multiple different constraints that we could pick for this
5383 /// operand (e.g. "imr") try to pick the 'best' one.
5384 /// This is somewhat tricky: constraints fall into four classes:
5385 ///    Other         -> immediates and magic values
5386 ///    Register      -> one specific register
5387 ///    RegisterClass -> a group of regs
5388 ///    Memory        -> memory
5389 /// Ideally, we would pick the most specific constraint possible: if we have
5390 /// something that fits into a register, we would pick it.  The problem here
5391 /// is that if we have something that could either be in a register or in
5392 /// memory that use of the register could cause selection of *other*
5393 /// operands to fail: they might only succeed if we pick memory.  Because of
5394 /// this the heuristic we use is:
5395 ///
5396 ///  1) If there is an 'other' constraint, and if the operand is valid for
5397 ///     that constraint, use it.  This makes us take advantage of 'i'
5398 ///     constraints when available.
5399 ///  2) Otherwise, pick the most general constraint present.  This prefers
5400 ///     'm' over 'r', for example.
5401 ///
5402 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
5403                              const TargetLowering &TLI,
5404                              SDValue Op, SelectionDAG *DAG) {
5405   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
5406   unsigned BestIdx = 0;
5407   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
5408   int BestGenerality = -1;
5409 
5410   // Loop over the options, keeping track of the most general one.
5411   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
5412     TargetLowering::ConstraintType CType =
5413       TLI.getConstraintType(OpInfo.Codes[i]);
5414 
5415     // Indirect 'other' or 'immediate' constraints are not allowed.
5416     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5417                                CType == TargetLowering::C_Register ||
5418                                CType == TargetLowering::C_RegisterClass))
5419       continue;
5420 
5421     // If this is an 'other' or 'immediate' constraint, see if the operand is
5422     // valid for it. For example, on X86 we might have an 'rI' constraint. If
5423     // the operand is an integer in the range [0..31] we want to use I (saving a
5424     // load of a register), otherwise we must use 'r'.
5425     if ((CType == TargetLowering::C_Other ||
5426          CType == TargetLowering::C_Immediate) && Op.getNode()) {
5427       assert(OpInfo.Codes[i].size() == 1 &&
5428              "Unhandled multi-letter 'other' constraint");
5429       std::vector<SDValue> ResultOps;
5430       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
5431                                        ResultOps, *DAG);
5432       if (!ResultOps.empty()) {
5433         BestType = CType;
5434         BestIdx = i;
5435         break;
5436       }
5437     }
5438 
5439     // Things with matching constraints can only be registers, per gcc
5440     // documentation.  This mainly affects "g" constraints.
5441     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5442       continue;
5443 
5444     // This constraint letter is more general than the previous one, use it.
5445     int Generality = getConstraintGenerality(CType);
5446     if (Generality > BestGenerality) {
5447       BestType = CType;
5448       BestIdx = i;
5449       BestGenerality = Generality;
5450     }
5451   }
5452 
5453   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
5454   OpInfo.ConstraintType = BestType;
5455 }
5456 
5457 /// Determines the constraint code and constraint type to use for the specific
5458 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5459 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5460                                             SDValue Op,
5461                                             SelectionDAG *DAG) const {
5462   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5463 
5464   // Single-letter constraints ('r') are very common.
5465   if (OpInfo.Codes.size() == 1) {
5466     OpInfo.ConstraintCode = OpInfo.Codes[0];
5467     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5468   } else {
5469     ChooseConstraint(OpInfo, *this, Op, DAG);
5470   }
5471 
5472   // 'X' matches anything.
5473   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
5474     // Constants are handled elsewhere.  For Functions, the type here is the
5475     // type of the result, which is not what we want to look at; leave them
5476     // alone.
5477     Value *v = OpInfo.CallOperandVal;
5478     if (isa<ConstantInt>(v) || isa<Function>(v)) {
5479       return;
5480     }
5481 
5482     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
5483       OpInfo.ConstraintCode = "i";
5484       return;
5485     }
5486 
5487     // Otherwise, try to resolve it to something we know about by looking at
5488     // the actual operand type.
5489     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
5490       OpInfo.ConstraintCode = Repl;
5491       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5492     }
5493   }
5494 }
5495 
5496 /// Given an exact SDIV by a constant, create a multiplication
5497 /// with the multiplicative inverse of the constant.
5498 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
5499                               const SDLoc &dl, SelectionDAG &DAG,
5500                               SmallVectorImpl<SDNode *> &Created) {
5501   SDValue Op0 = N->getOperand(0);
5502   SDValue Op1 = N->getOperand(1);
5503   EVT VT = N->getValueType(0);
5504   EVT SVT = VT.getScalarType();
5505   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
5506   EVT ShSVT = ShVT.getScalarType();
5507 
5508   bool UseSRA = false;
5509   SmallVector<SDValue, 16> Shifts, Factors;
5510 
5511   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5512     if (C->isZero())
5513       return false;
5514     APInt Divisor = C->getAPIntValue();
5515     unsigned Shift = Divisor.countTrailingZeros();
5516     if (Shift) {
5517       Divisor.ashrInPlace(Shift);
5518       UseSRA = true;
5519     }
5520     // Calculate the multiplicative inverse, using Newton's method.
5521     APInt t;
5522     APInt Factor = Divisor;
5523     while ((t = Divisor * Factor) != 1)
5524       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
5525     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
5526     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
5527     return true;
5528   };
5529 
5530   // Collect all magic values from the build vector.
5531   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
5532     return SDValue();
5533 
5534   SDValue Shift, Factor;
5535   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
5536     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5537     Factor = DAG.getBuildVector(VT, dl, Factors);
5538   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
5539     assert(Shifts.size() == 1 && Factors.size() == 1 &&
5540            "Expected matchUnaryPredicate to return one element for scalable "
5541            "vectors");
5542     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5543     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5544   } else {
5545     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
5546     Shift = Shifts[0];
5547     Factor = Factors[0];
5548   }
5549 
5550   SDValue Res = Op0;
5551 
5552   // Shift the value upfront if it is even, so the LSB is one.
5553   if (UseSRA) {
5554     // TODO: For UDIV use SRL instead of SRA.
5555     SDNodeFlags Flags;
5556     Flags.setExact(true);
5557     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
5558     Created.push_back(Res.getNode());
5559   }
5560 
5561   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
5562 }
5563 
5564 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5565                               SelectionDAG &DAG,
5566                               SmallVectorImpl<SDNode *> &Created) const {
5567   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5568   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5569   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5570     return SDValue(N, 0); // Lower SDIV as SDIV
5571   return SDValue();
5572 }
5573 
5574 SDValue
5575 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
5576                               SelectionDAG &DAG,
5577                               SmallVectorImpl<SDNode *> &Created) const {
5578   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5579   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5580   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5581     return SDValue(N, 0); // Lower SREM as SREM
5582   return SDValue();
5583 }
5584 
5585 /// Given an ISD::SDIV node expressing a divide by constant,
5586 /// return a DAG expression to select that will generate the same value by
5587 /// multiplying by a magic number.
5588 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5589 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
5590                                   bool IsAfterLegalization,
5591                                   SmallVectorImpl<SDNode *> &Created) const {
5592   SDLoc dl(N);
5593   EVT VT = N->getValueType(0);
5594   EVT SVT = VT.getScalarType();
5595   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5596   EVT ShSVT = ShVT.getScalarType();
5597   unsigned EltBits = VT.getScalarSizeInBits();
5598   EVT MulVT;
5599 
5600   // Check to see if we can do this.
5601   // FIXME: We should be more aggressive here.
5602   if (!isTypeLegal(VT)) {
5603     // Limit this to simple scalars for now.
5604     if (VT.isVector() || !VT.isSimple())
5605       return SDValue();
5606 
5607     // If this type will be promoted to a large enough type with a legal
5608     // multiply operation, we can go ahead and do this transform.
5609     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5610       return SDValue();
5611 
5612     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5613     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5614         !isOperationLegal(ISD::MUL, MulVT))
5615       return SDValue();
5616   }
5617 
5618   // If the sdiv has an 'exact' bit we can use a simpler lowering.
5619   if (N->getFlags().hasExact())
5620     return BuildExactSDIV(*this, N, dl, DAG, Created);
5621 
5622   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5623 
5624   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5625     if (C->isZero())
5626       return false;
5627 
5628     const APInt &Divisor = C->getAPIntValue();
5629     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
5630     int NumeratorFactor = 0;
5631     int ShiftMask = -1;
5632 
5633     if (Divisor.isOne() || Divisor.isAllOnes()) {
5634       // If d is +1/-1, we just multiply the numerator by +1/-1.
5635       NumeratorFactor = Divisor.getSExtValue();
5636       magics.Magic = 0;
5637       magics.ShiftAmount = 0;
5638       ShiftMask = 0;
5639     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
5640       // If d > 0 and m < 0, add the numerator.
5641       NumeratorFactor = 1;
5642     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
5643       // If d < 0 and m > 0, subtract the numerator.
5644       NumeratorFactor = -1;
5645     }
5646 
5647     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
5648     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
5649     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
5650     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
5651     return true;
5652   };
5653 
5654   SDValue N0 = N->getOperand(0);
5655   SDValue N1 = N->getOperand(1);
5656 
5657   // Collect the shifts / magic values from each element.
5658   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
5659     return SDValue();
5660 
5661   SDValue MagicFactor, Factor, Shift, ShiftMask;
5662   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5663     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5664     Factor = DAG.getBuildVector(VT, dl, Factors);
5665     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5666     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
5667   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5668     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
5669            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
5670            "Expected matchUnaryPredicate to return one element for scalable "
5671            "vectors");
5672     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5673     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5674     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5675     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
5676   } else {
5677     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5678     MagicFactor = MagicFactors[0];
5679     Factor = Factors[0];
5680     Shift = Shifts[0];
5681     ShiftMask = ShiftMasks[0];
5682   }
5683 
5684   // Multiply the numerator (operand 0) by the magic value.
5685   // FIXME: We should support doing a MUL in a wider type.
5686   auto GetMULHS = [&](SDValue X, SDValue Y) {
5687     // If the type isn't legal, use a wider mul of the the type calculated
5688     // earlier.
5689     if (!isTypeLegal(VT)) {
5690       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
5691       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
5692       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5693       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5694                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5695       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5696     }
5697 
5698     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
5699       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
5700     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
5701       SDValue LoHi =
5702           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5703       return SDValue(LoHi.getNode(), 1);
5704     }
5705     return SDValue();
5706   };
5707 
5708   SDValue Q = GetMULHS(N0, MagicFactor);
5709   if (!Q)
5710     return SDValue();
5711 
5712   Created.push_back(Q.getNode());
5713 
5714   // (Optionally) Add/subtract the numerator using Factor.
5715   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
5716   Created.push_back(Factor.getNode());
5717   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5718   Created.push_back(Q.getNode());
5719 
5720   // Shift right algebraic by shift value.
5721   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5722   Created.push_back(Q.getNode());
5723 
5724   // Extract the sign bit, mask it and add it to the quotient.
5725   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5726   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5727   Created.push_back(T.getNode());
5728   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5729   Created.push_back(T.getNode());
5730   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5731 }
5732 
5733 /// Given an ISD::UDIV node expressing a divide by constant,
5734 /// return a DAG expression to select that will generate the same value by
5735 /// multiplying by a magic number.
5736 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5737 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5738                                   bool IsAfterLegalization,
5739                                   SmallVectorImpl<SDNode *> &Created) const {
5740   SDLoc dl(N);
5741   EVT VT = N->getValueType(0);
5742   EVT SVT = VT.getScalarType();
5743   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5744   EVT ShSVT = ShVT.getScalarType();
5745   unsigned EltBits = VT.getScalarSizeInBits();
5746   EVT MulVT;
5747 
5748   // Check to see if we can do this.
5749   // FIXME: We should be more aggressive here.
5750   if (!isTypeLegal(VT)) {
5751     // Limit this to simple scalars for now.
5752     if (VT.isVector() || !VT.isSimple())
5753       return SDValue();
5754 
5755     // If this type will be promoted to a large enough type with a legal
5756     // multiply operation, we can go ahead and do this transform.
5757     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5758       return SDValue();
5759 
5760     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5761     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5762         !isOperationLegal(ISD::MUL, MulVT))
5763       return SDValue();
5764   }
5765 
5766   bool UseNPQ = false;
5767   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5768 
5769   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5770     if (C->isZero())
5771       return false;
5772     // FIXME: We should use a narrower constant when the upper
5773     // bits are known to be zero.
5774     const APInt& Divisor = C->getAPIntValue();
5775     UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
5776     unsigned PreShift = 0, PostShift = 0;
5777 
5778     // If the divisor is even, we can avoid using the expensive fixup by
5779     // shifting the divided value upfront.
5780     if (magics.IsAdd != 0 && !Divisor[0]) {
5781       PreShift = Divisor.countTrailingZeros();
5782       // Get magic number for the shifted divisor.
5783       magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
5784       assert(magics.IsAdd == 0 && "Should use cheap fixup now");
5785     }
5786 
5787     APInt Magic = magics.Magic;
5788 
5789     unsigned SelNPQ;
5790     if (magics.IsAdd == 0 || Divisor.isOne()) {
5791       assert(magics.ShiftAmount < Divisor.getBitWidth() &&
5792              "We shouldn't generate an undefined shift!");
5793       PostShift = magics.ShiftAmount;
5794       SelNPQ = false;
5795     } else {
5796       PostShift = magics.ShiftAmount - 1;
5797       SelNPQ = true;
5798     }
5799 
5800     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5801     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5802     NPQFactors.push_back(
5803         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5804                                : APInt::getZero(EltBits),
5805                         dl, SVT));
5806     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5807     UseNPQ |= SelNPQ;
5808     return true;
5809   };
5810 
5811   SDValue N0 = N->getOperand(0);
5812   SDValue N1 = N->getOperand(1);
5813 
5814   // Collect the shifts/magic values from each element.
5815   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5816     return SDValue();
5817 
5818   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5819   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5820     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5821     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5822     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5823     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5824   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5825     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
5826            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
5827            "Expected matchUnaryPredicate to return one for scalable vectors");
5828     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
5829     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5830     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
5831     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
5832   } else {
5833     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5834     PreShift = PreShifts[0];
5835     MagicFactor = MagicFactors[0];
5836     PostShift = PostShifts[0];
5837   }
5838 
5839   SDValue Q = N0;
5840   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5841   Created.push_back(Q.getNode());
5842 
5843   // FIXME: We should support doing a MUL in a wider type.
5844   auto GetMULHU = [&](SDValue X, SDValue Y) {
5845     // If the type isn't legal, use a wider mul of the the type calculated
5846     // earlier.
5847     if (!isTypeLegal(VT)) {
5848       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
5849       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
5850       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5851       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5852                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5853       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5854     }
5855 
5856     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
5857       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5858     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
5859       SDValue LoHi =
5860           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5861       return SDValue(LoHi.getNode(), 1);
5862     }
5863     return SDValue(); // No mulhu or equivalent
5864   };
5865 
5866   // Multiply the numerator (operand 0) by the magic value.
5867   Q = GetMULHU(Q, MagicFactor);
5868   if (!Q)
5869     return SDValue();
5870 
5871   Created.push_back(Q.getNode());
5872 
5873   if (UseNPQ) {
5874     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5875     Created.push_back(NPQ.getNode());
5876 
5877     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5878     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5879     if (VT.isVector())
5880       NPQ = GetMULHU(NPQ, NPQFactor);
5881     else
5882       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5883 
5884     Created.push_back(NPQ.getNode());
5885 
5886     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5887     Created.push_back(Q.getNode());
5888   }
5889 
5890   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5891   Created.push_back(Q.getNode());
5892 
5893   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5894 
5895   SDValue One = DAG.getConstant(1, dl, VT);
5896   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
5897   return DAG.getSelect(dl, VT, IsOne, N0, Q);
5898 }
5899 
5900 /// If all values in Values that *don't* match the predicate are same 'splat'
5901 /// value, then replace all values with that splat value.
5902 /// Else, if AlternativeReplacement was provided, then replace all values that
5903 /// do match predicate with AlternativeReplacement value.
5904 static void
5905 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5906                           std::function<bool(SDValue)> Predicate,
5907                           SDValue AlternativeReplacement = SDValue()) {
5908   SDValue Replacement;
5909   // Is there a value for which the Predicate does *NOT* match? What is it?
5910   auto SplatValue = llvm::find_if_not(Values, Predicate);
5911   if (SplatValue != Values.end()) {
5912     // Does Values consist only of SplatValue's and values matching Predicate?
5913     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5914           return Value == *SplatValue || Predicate(Value);
5915         })) // Then we shall replace values matching predicate with SplatValue.
5916       Replacement = *SplatValue;
5917   }
5918   if (!Replacement) {
5919     // Oops, we did not find the "baseline" splat value.
5920     if (!AlternativeReplacement)
5921       return; // Nothing to do.
5922     // Let's replace with provided value then.
5923     Replacement = AlternativeReplacement;
5924   }
5925   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5926 }
5927 
5928 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5929 /// where the divisor is constant and the comparison target is zero,
5930 /// return a DAG expression that will generate the same comparison result
5931 /// using only multiplications, additions and shifts/rotations.
5932 /// Ref: "Hacker's Delight" 10-17.
5933 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5934                                         SDValue CompTargetNode,
5935                                         ISD::CondCode Cond,
5936                                         DAGCombinerInfo &DCI,
5937                                         const SDLoc &DL) const {
5938   SmallVector<SDNode *, 5> Built;
5939   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5940                                          DCI, DL, Built)) {
5941     for (SDNode *N : Built)
5942       DCI.AddToWorklist(N);
5943     return Folded;
5944   }
5945 
5946   return SDValue();
5947 }
5948 
5949 SDValue
5950 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5951                                   SDValue CompTargetNode, ISD::CondCode Cond,
5952                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5953                                   SmallVectorImpl<SDNode *> &Created) const {
5954   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5955   // - D must be constant, with D = D0 * 2^K where D0 is odd
5956   // - P is the multiplicative inverse of D0 modulo 2^W
5957   // - Q = floor(((2^W) - 1) / D)
5958   // where W is the width of the common type of N and D.
5959   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5960          "Only applicable for (in)equality comparisons.");
5961 
5962   SelectionDAG &DAG = DCI.DAG;
5963 
5964   EVT VT = REMNode.getValueType();
5965   EVT SVT = VT.getScalarType();
5966   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
5967   EVT ShSVT = ShVT.getScalarType();
5968 
5969   // If MUL is unavailable, we cannot proceed in any case.
5970   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
5971     return SDValue();
5972 
5973   bool ComparingWithAllZeros = true;
5974   bool AllComparisonsWithNonZerosAreTautological = true;
5975   bool HadTautologicalLanes = false;
5976   bool AllLanesAreTautological = true;
5977   bool HadEvenDivisor = false;
5978   bool AllDivisorsArePowerOfTwo = true;
5979   bool HadTautologicalInvertedLanes = false;
5980   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5981 
5982   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5983     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5984     if (CDiv->isZero())
5985       return false;
5986 
5987     const APInt &D = CDiv->getAPIntValue();
5988     const APInt &Cmp = CCmp->getAPIntValue();
5989 
5990     ComparingWithAllZeros &= Cmp.isZero();
5991 
5992     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5993     // if C2 is not less than C1, the comparison is always false.
5994     // But we will only be able to produce the comparison that will give the
5995     // opposive tautological answer. So this lane would need to be fixed up.
5996     bool TautologicalInvertedLane = D.ule(Cmp);
5997     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5998 
5999     // If all lanes are tautological (either all divisors are ones, or divisor
6000     // is not greater than the constant we are comparing with),
6001     // we will prefer to avoid the fold.
6002     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6003     HadTautologicalLanes |= TautologicalLane;
6004     AllLanesAreTautological &= TautologicalLane;
6005 
6006     // If we are comparing with non-zero, we need'll need  to subtract said
6007     // comparison value from the LHS. But there is no point in doing that if
6008     // every lane where we are comparing with non-zero is tautological..
6009     if (!Cmp.isZero())
6010       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6011 
6012     // Decompose D into D0 * 2^K
6013     unsigned K = D.countTrailingZeros();
6014     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6015     APInt D0 = D.lshr(K);
6016 
6017     // D is even if it has trailing zeros.
6018     HadEvenDivisor |= (K != 0);
6019     // D is a power-of-two if D0 is one.
6020     // If all divisors are power-of-two, we will prefer to avoid the fold.
6021     AllDivisorsArePowerOfTwo &= D0.isOne();
6022 
6023     // P = inv(D0, 2^W)
6024     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6025     unsigned W = D.getBitWidth();
6026     APInt P = D0.zext(W + 1)
6027                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6028                   .trunc(W);
6029     assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6030     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6031 
6032     // Q = floor((2^W - 1) u/ D)
6033     // R = ((2^W - 1) u% D)
6034     APInt Q, R;
6035     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6036 
6037     // If we are comparing with zero, then that comparison constant is okay,
6038     // else it may need to be one less than that.
6039     if (Cmp.ugt(R))
6040       Q -= 1;
6041 
6042     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6043            "We are expecting that K is always less than all-ones for ShSVT");
6044 
6045     // If the lane is tautological the result can be constant-folded.
6046     if (TautologicalLane) {
6047       // Set P and K amount to a bogus values so we can try to splat them.
6048       P = 0;
6049       K = -1;
6050       // And ensure that comparison constant is tautological,
6051       // it will always compare true/false.
6052       Q = -1;
6053     }
6054 
6055     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6056     KAmts.push_back(
6057         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6058     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6059     return true;
6060   };
6061 
6062   SDValue N = REMNode.getOperand(0);
6063   SDValue D = REMNode.getOperand(1);
6064 
6065   // Collect the values from each element.
6066   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6067     return SDValue();
6068 
6069   // If all lanes are tautological, the result can be constant-folded.
6070   if (AllLanesAreTautological)
6071     return SDValue();
6072 
6073   // If this is a urem by a powers-of-two, avoid the fold since it can be
6074   // best implemented as a bit test.
6075   if (AllDivisorsArePowerOfTwo)
6076     return SDValue();
6077 
6078   SDValue PVal, KVal, QVal;
6079   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6080     if (HadTautologicalLanes) {
6081       // Try to turn PAmts into a splat, since we don't care about the values
6082       // that are currently '0'. If we can't, just keep '0'`s.
6083       turnVectorIntoSplatVector(PAmts, isNullConstant);
6084       // Try to turn KAmts into a splat, since we don't care about the values
6085       // that are currently '-1'. If we can't, change them to '0'`s.
6086       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6087                                 DAG.getConstant(0, DL, ShSVT));
6088     }
6089 
6090     PVal = DAG.getBuildVector(VT, DL, PAmts);
6091     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6092     QVal = DAG.getBuildVector(VT, DL, QAmts);
6093   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6094     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6095            "Expected matchBinaryPredicate to return one element for "
6096            "SPLAT_VECTORs");
6097     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6098     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6099     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6100   } else {
6101     PVal = PAmts[0];
6102     KVal = KAmts[0];
6103     QVal = QAmts[0];
6104   }
6105 
6106   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6107     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6108       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6109     assert(CompTargetNode.getValueType() == N.getValueType() &&
6110            "Expecting that the types on LHS and RHS of comparisons match.");
6111     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6112   }
6113 
6114   // (mul N, P)
6115   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6116   Created.push_back(Op0.getNode());
6117 
6118   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6119   // divisors as a performance improvement, since rotating by 0 is a no-op.
6120   if (HadEvenDivisor) {
6121     // We need ROTR to do this.
6122     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6123       return SDValue();
6124     // UREM: (rotr (mul N, P), K)
6125     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6126     Created.push_back(Op0.getNode());
6127   }
6128 
6129   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6130   SDValue NewCC =
6131       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6132                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6133   if (!HadTautologicalInvertedLanes)
6134     return NewCC;
6135 
6136   // If any lanes previously compared always-false, the NewCC will give
6137   // always-true result for them, so we need to fixup those lanes.
6138   // Or the other way around for inequality predicate.
6139   assert(VT.isVector() && "Can/should only get here for vectors.");
6140   Created.push_back(NewCC.getNode());
6141 
6142   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6143   // if C2 is not less than C1, the comparison is always false.
6144   // But we have produced the comparison that will give the
6145   // opposive tautological answer. So these lanes would need to be fixed up.
6146   SDValue TautologicalInvertedChannels =
6147       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6148   Created.push_back(TautologicalInvertedChannels.getNode());
6149 
6150   // NOTE: we avoid letting illegal types through even if we're before legalize
6151   // ops – legalization has a hard time producing good code for this.
6152   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6153     // If we have a vector select, let's replace the comparison results in the
6154     // affected lanes with the correct tautological result.
6155     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6156                                               DL, SETCCVT, SETCCVT);
6157     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6158                        Replacement, NewCC);
6159   }
6160 
6161   // Else, we can just invert the comparison result in the appropriate lanes.
6162   //
6163   // NOTE: see the note above VSELECT above.
6164   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6165     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6166                        TautologicalInvertedChannels);
6167 
6168   return SDValue(); // Don't know how to lower.
6169 }
6170 
6171 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6172 /// where the divisor is constant and the comparison target is zero,
6173 /// return a DAG expression that will generate the same comparison result
6174 /// using only multiplications, additions and shifts/rotations.
6175 /// Ref: "Hacker's Delight" 10-17.
6176 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6177                                         SDValue CompTargetNode,
6178                                         ISD::CondCode Cond,
6179                                         DAGCombinerInfo &DCI,
6180                                         const SDLoc &DL) const {
6181   SmallVector<SDNode *, 7> Built;
6182   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6183                                          DCI, DL, Built)) {
6184     assert(Built.size() <= 7 && "Max size prediction failed.");
6185     for (SDNode *N : Built)
6186       DCI.AddToWorklist(N);
6187     return Folded;
6188   }
6189 
6190   return SDValue();
6191 }
6192 
6193 SDValue
6194 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6195                                   SDValue CompTargetNode, ISD::CondCode Cond,
6196                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6197                                   SmallVectorImpl<SDNode *> &Created) const {
6198   // Fold:
6199   //   (seteq/ne (srem N, D), 0)
6200   // To:
6201   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6202   //
6203   // - D must be constant, with D = D0 * 2^K where D0 is odd
6204   // - P is the multiplicative inverse of D0 modulo 2^W
6205   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6206   // - Q = floor((2 * A) / (2^K))
6207   // where W is the width of the common type of N and D.
6208   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6209          "Only applicable for (in)equality comparisons.");
6210 
6211   SelectionDAG &DAG = DCI.DAG;
6212 
6213   EVT VT = REMNode.getValueType();
6214   EVT SVT = VT.getScalarType();
6215   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6216   EVT ShSVT = ShVT.getScalarType();
6217 
6218   // If we are after ops legalization, and MUL is unavailable, we can not
6219   // proceed.
6220   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6221     return SDValue();
6222 
6223   // TODO: Could support comparing with non-zero too.
6224   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6225   if (!CompTarget || !CompTarget->isZero())
6226     return SDValue();
6227 
6228   bool HadIntMinDivisor = false;
6229   bool HadOneDivisor = false;
6230   bool AllDivisorsAreOnes = true;
6231   bool HadEvenDivisor = false;
6232   bool NeedToApplyOffset = false;
6233   bool AllDivisorsArePowerOfTwo = true;
6234   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6235 
6236   auto BuildSREMPattern = [&](ConstantSDNode *C) {
6237     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6238     if (C->isZero())
6239       return false;
6240 
6241     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6242 
6243     // WARNING: this fold is only valid for positive divisors!
6244     APInt D = C->getAPIntValue();
6245     if (D.isNegative())
6246       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6247 
6248     HadIntMinDivisor |= D.isMinSignedValue();
6249 
6250     // If all divisors are ones, we will prefer to avoid the fold.
6251     HadOneDivisor |= D.isOne();
6252     AllDivisorsAreOnes &= D.isOne();
6253 
6254     // Decompose D into D0 * 2^K
6255     unsigned K = D.countTrailingZeros();
6256     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6257     APInt D0 = D.lshr(K);
6258 
6259     if (!D.isMinSignedValue()) {
6260       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6261       // we don't care about this lane in this fold, we'll special-handle it.
6262       HadEvenDivisor |= (K != 0);
6263     }
6264 
6265     // D is a power-of-two if D0 is one. This includes INT_MIN.
6266     // If all divisors are power-of-two, we will prefer to avoid the fold.
6267     AllDivisorsArePowerOfTwo &= D0.isOne();
6268 
6269     // P = inv(D0, 2^W)
6270     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6271     unsigned W = D.getBitWidth();
6272     APInt P = D0.zext(W + 1)
6273                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6274                   .trunc(W);
6275     assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6276     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6277 
6278     // A = floor((2^(W - 1) - 1) / D0) & -2^K
6279     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
6280     A.clearLowBits(K);
6281 
6282     if (!D.isMinSignedValue()) {
6283       // If divisor INT_MIN, then we don't care about this lane in this fold,
6284       // we'll special-handle it.
6285       NeedToApplyOffset |= A != 0;
6286     }
6287 
6288     // Q = floor((2 * A) / (2^K))
6289     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6290 
6291     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6292            "We are expecting that A is always less than all-ones for SVT");
6293     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6294            "We are expecting that K is always less than all-ones for ShSVT");
6295 
6296     // If the divisor is 1 the result can be constant-folded. Likewise, we
6297     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6298     if (D.isOne()) {
6299       // Set P, A and K to a bogus values so we can try to splat them.
6300       P = 0;
6301       A = -1;
6302       K = -1;
6303 
6304       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
6305       Q = -1;
6306     }
6307 
6308     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6309     AAmts.push_back(DAG.getConstant(A, DL, SVT));
6310     KAmts.push_back(
6311         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6312     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6313     return true;
6314   };
6315 
6316   SDValue N = REMNode.getOperand(0);
6317   SDValue D = REMNode.getOperand(1);
6318 
6319   // Collect the values from each element.
6320   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6321     return SDValue();
6322 
6323   // If this is a srem by a one, avoid the fold since it can be constant-folded.
6324   if (AllDivisorsAreOnes)
6325     return SDValue();
6326 
6327   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6328   // since it can be best implemented as a bit test.
6329   if (AllDivisorsArePowerOfTwo)
6330     return SDValue();
6331 
6332   SDValue PVal, AVal, KVal, QVal;
6333   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6334     if (HadOneDivisor) {
6335       // Try to turn PAmts into a splat, since we don't care about the values
6336       // that are currently '0'. If we can't, just keep '0'`s.
6337       turnVectorIntoSplatVector(PAmts, isNullConstant);
6338       // Try to turn AAmts into a splat, since we don't care about the
6339       // values that are currently '-1'. If we can't, change them to '0'`s.
6340       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
6341                                 DAG.getConstant(0, DL, SVT));
6342       // Try to turn KAmts into a splat, since we don't care about the values
6343       // that are currently '-1'. If we can't, change them to '0'`s.
6344       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6345                                 DAG.getConstant(0, DL, ShSVT));
6346     }
6347 
6348     PVal = DAG.getBuildVector(VT, DL, PAmts);
6349     AVal = DAG.getBuildVector(VT, DL, AAmts);
6350     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6351     QVal = DAG.getBuildVector(VT, DL, QAmts);
6352   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6353     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6354            QAmts.size() == 1 &&
6355            "Expected matchUnaryPredicate to return one element for scalable "
6356            "vectors");
6357     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6358     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6359     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6360     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6361   } else {
6362     assert(isa<ConstantSDNode>(D) && "Expected a constant");
6363     PVal = PAmts[0];
6364     AVal = AAmts[0];
6365     KVal = KAmts[0];
6366     QVal = QAmts[0];
6367   }
6368 
6369   // (mul N, P)
6370   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6371   Created.push_back(Op0.getNode());
6372 
6373   if (NeedToApplyOffset) {
6374     // We need ADD to do this.
6375     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
6376       return SDValue();
6377 
6378     // (add (mul N, P), A)
6379     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
6380     Created.push_back(Op0.getNode());
6381   }
6382 
6383   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6384   // divisors as a performance improvement, since rotating by 0 is a no-op.
6385   if (HadEvenDivisor) {
6386     // We need ROTR to do this.
6387     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6388       return SDValue();
6389     // SREM: (rotr (add (mul N, P), A), K)
6390     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6391     Created.push_back(Op0.getNode());
6392   }
6393 
6394   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
6395   SDValue Fold =
6396       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6397                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6398 
6399   // If we didn't have lanes with INT_MIN divisor, then we're done.
6400   if (!HadIntMinDivisor)
6401     return Fold;
6402 
6403   // That fold is only valid for positive divisors. Which effectively means,
6404   // it is invalid for INT_MIN divisors. So if we have such a lane,
6405   // we must fix-up results for said lanes.
6406   assert(VT.isVector() && "Can/should only get here for vectors.");
6407 
6408   // NOTE: we avoid letting illegal types through even if we're before legalize
6409   // ops – legalization has a hard time producing good code for the code that
6410   // follows.
6411   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
6412       !isOperationLegalOrCustom(ISD::AND, VT) ||
6413       !isOperationLegalOrCustom(Cond, VT) ||
6414       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
6415     return SDValue();
6416 
6417   Created.push_back(Fold.getNode());
6418 
6419   SDValue IntMin = DAG.getConstant(
6420       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
6421   SDValue IntMax = DAG.getConstant(
6422       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
6423   SDValue Zero =
6424       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
6425 
6426   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
6427   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
6428   Created.push_back(DivisorIsIntMin.getNode());
6429 
6430   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
6431   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
6432   Created.push_back(Masked.getNode());
6433   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
6434   Created.push_back(MaskedIsZero.getNode());
6435 
6436   // To produce final result we need to blend 2 vectors: 'SetCC' and
6437   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
6438   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
6439   // constant-folded, select can get lowered to a shuffle with constant mask.
6440   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
6441                                 MaskedIsZero, Fold);
6442 
6443   return Blended;
6444 }
6445 
6446 bool TargetLowering::
6447 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
6448   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
6449     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
6450                                 "be a constant integer");
6451     return true;
6452   }
6453 
6454   return false;
6455 }
6456 
6457 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
6458                                          const DenormalMode &Mode) const {
6459   SDLoc DL(Op);
6460   EVT VT = Op.getValueType();
6461   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6462   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
6463   // Testing it with denormal inputs to avoid wrong estimate.
6464   if (Mode.Input == DenormalMode::IEEE) {
6465     // This is specifically a check for the handling of denormal inputs,
6466     // not the result.
6467 
6468     // Test = fabs(X) < SmallestNormal
6469     const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
6470     APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
6471     SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
6472     SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
6473     return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
6474   }
6475   // Test = X == 0.0
6476   return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
6477 }
6478 
6479 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
6480                                              bool LegalOps, bool OptForSize,
6481                                              NegatibleCost &Cost,
6482                                              unsigned Depth) const {
6483   // fneg is removable even if it has multiple uses.
6484   if (Op.getOpcode() == ISD::FNEG) {
6485     Cost = NegatibleCost::Cheaper;
6486     return Op.getOperand(0);
6487   }
6488 
6489   // Don't recurse exponentially.
6490   if (Depth > SelectionDAG::MaxRecursionDepth)
6491     return SDValue();
6492 
6493   // Pre-increment recursion depth for use in recursive calls.
6494   ++Depth;
6495   const SDNodeFlags Flags = Op->getFlags();
6496   const TargetOptions &Options = DAG.getTarget().Options;
6497   EVT VT = Op.getValueType();
6498   unsigned Opcode = Op.getOpcode();
6499 
6500   // Don't allow anything with multiple uses unless we know it is free.
6501   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
6502     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
6503                         isFPExtFree(VT, Op.getOperand(0).getValueType());
6504     if (!IsFreeExtend)
6505       return SDValue();
6506   }
6507 
6508   auto RemoveDeadNode = [&](SDValue N) {
6509     if (N && N.getNode()->use_empty())
6510       DAG.RemoveDeadNode(N.getNode());
6511   };
6512 
6513   SDLoc DL(Op);
6514 
6515   // Because getNegatedExpression can delete nodes we need a handle to keep
6516   // temporary nodes alive in case the recursion manages to create an identical
6517   // node.
6518   std::list<HandleSDNode> Handles;
6519 
6520   switch (Opcode) {
6521   case ISD::ConstantFP: {
6522     // Don't invert constant FP values after legalization unless the target says
6523     // the negated constant is legal.
6524     bool IsOpLegal =
6525         isOperationLegal(ISD::ConstantFP, VT) ||
6526         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
6527                      OptForSize);
6528 
6529     if (LegalOps && !IsOpLegal)
6530       break;
6531 
6532     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
6533     V.changeSign();
6534     SDValue CFP = DAG.getConstantFP(V, DL, VT);
6535 
6536     // If we already have the use of the negated floating constant, it is free
6537     // to negate it even it has multiple uses.
6538     if (!Op.hasOneUse() && CFP.use_empty())
6539       break;
6540     Cost = NegatibleCost::Neutral;
6541     return CFP;
6542   }
6543   case ISD::BUILD_VECTOR: {
6544     // Only permit BUILD_VECTOR of constants.
6545     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
6546           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
6547         }))
6548       break;
6549 
6550     bool IsOpLegal =
6551         (isOperationLegal(ISD::ConstantFP, VT) &&
6552          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
6553         llvm::all_of(Op->op_values(), [&](SDValue N) {
6554           return N.isUndef() ||
6555                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
6556                               OptForSize);
6557         });
6558 
6559     if (LegalOps && !IsOpLegal)
6560       break;
6561 
6562     SmallVector<SDValue, 4> Ops;
6563     for (SDValue C : Op->op_values()) {
6564       if (C.isUndef()) {
6565         Ops.push_back(C);
6566         continue;
6567       }
6568       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
6569       V.changeSign();
6570       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
6571     }
6572     Cost = NegatibleCost::Neutral;
6573     return DAG.getBuildVector(VT, DL, Ops);
6574   }
6575   case ISD::FADD: {
6576     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6577       break;
6578 
6579     // After operation legalization, it might not be legal to create new FSUBs.
6580     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
6581       break;
6582     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6583 
6584     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6585     NegatibleCost CostX = NegatibleCost::Expensive;
6586     SDValue NegX =
6587         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6588     // Prevent this node from being deleted by the next call.
6589     if (NegX)
6590       Handles.emplace_back(NegX);
6591 
6592     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6593     NegatibleCost CostY = NegatibleCost::Expensive;
6594     SDValue NegY =
6595         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6596 
6597     // We're done with the handles.
6598     Handles.clear();
6599 
6600     // Negate the X if its cost is less or equal than Y.
6601     if (NegX && (CostX <= CostY)) {
6602       Cost = CostX;
6603       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
6604       if (NegY != N)
6605         RemoveDeadNode(NegY);
6606       return N;
6607     }
6608 
6609     // Negate the Y if it is not expensive.
6610     if (NegY) {
6611       Cost = CostY;
6612       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
6613       if (NegX != N)
6614         RemoveDeadNode(NegX);
6615       return N;
6616     }
6617     break;
6618   }
6619   case ISD::FSUB: {
6620     // We can't turn -(A-B) into B-A when we honor signed zeros.
6621     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6622       break;
6623 
6624     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6625     // fold (fneg (fsub 0, Y)) -> Y
6626     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
6627       if (C->isZero()) {
6628         Cost = NegatibleCost::Cheaper;
6629         return Y;
6630       }
6631 
6632     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6633     Cost = NegatibleCost::Neutral;
6634     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
6635   }
6636   case ISD::FMUL:
6637   case ISD::FDIV: {
6638     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6639 
6640     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6641     NegatibleCost CostX = NegatibleCost::Expensive;
6642     SDValue NegX =
6643         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6644     // Prevent this node from being deleted by the next call.
6645     if (NegX)
6646       Handles.emplace_back(NegX);
6647 
6648     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6649     NegatibleCost CostY = NegatibleCost::Expensive;
6650     SDValue NegY =
6651         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6652 
6653     // We're done with the handles.
6654     Handles.clear();
6655 
6656     // Negate the X if its cost is less or equal than Y.
6657     if (NegX && (CostX <= CostY)) {
6658       Cost = CostX;
6659       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
6660       if (NegY != N)
6661         RemoveDeadNode(NegY);
6662       return N;
6663     }
6664 
6665     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6666     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
6667       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
6668         break;
6669 
6670     // Negate the Y if it is not expensive.
6671     if (NegY) {
6672       Cost = CostY;
6673       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
6674       if (NegX != N)
6675         RemoveDeadNode(NegX);
6676       return N;
6677     }
6678     break;
6679   }
6680   case ISD::FMA:
6681   case ISD::FMAD: {
6682     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6683       break;
6684 
6685     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
6686     NegatibleCost CostZ = NegatibleCost::Expensive;
6687     SDValue NegZ =
6688         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
6689     // Give up if fail to negate the Z.
6690     if (!NegZ)
6691       break;
6692 
6693     // Prevent this node from being deleted by the next two calls.
6694     Handles.emplace_back(NegZ);
6695 
6696     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6697     NegatibleCost CostX = NegatibleCost::Expensive;
6698     SDValue NegX =
6699         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6700     // Prevent this node from being deleted by the next call.
6701     if (NegX)
6702       Handles.emplace_back(NegX);
6703 
6704     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6705     NegatibleCost CostY = NegatibleCost::Expensive;
6706     SDValue NegY =
6707         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6708 
6709     // We're done with the handles.
6710     Handles.clear();
6711 
6712     // Negate the X if its cost is less or equal than Y.
6713     if (NegX && (CostX <= CostY)) {
6714       Cost = std::min(CostX, CostZ);
6715       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
6716       if (NegY != N)
6717         RemoveDeadNode(NegY);
6718       return N;
6719     }
6720 
6721     // Negate the Y if it is not expensive.
6722     if (NegY) {
6723       Cost = std::min(CostY, CostZ);
6724       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
6725       if (NegX != N)
6726         RemoveDeadNode(NegX);
6727       return N;
6728     }
6729     break;
6730   }
6731 
6732   case ISD::FP_EXTEND:
6733   case ISD::FSIN:
6734     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6735                                             OptForSize, Cost, Depth))
6736       return DAG.getNode(Opcode, DL, VT, NegV);
6737     break;
6738   case ISD::FP_ROUND:
6739     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6740                                             OptForSize, Cost, Depth))
6741       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
6742     break;
6743   }
6744 
6745   return SDValue();
6746 }
6747 
6748 //===----------------------------------------------------------------------===//
6749 // Legalization Utilities
6750 //===----------------------------------------------------------------------===//
6751 
6752 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
6753                                     SDValue LHS, SDValue RHS,
6754                                     SmallVectorImpl<SDValue> &Result,
6755                                     EVT HiLoVT, SelectionDAG &DAG,
6756                                     MulExpansionKind Kind, SDValue LL,
6757                                     SDValue LH, SDValue RL, SDValue RH) const {
6758   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
6759          Opcode == ISD::SMUL_LOHI);
6760 
6761   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
6762                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
6763   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
6764                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
6765   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6766                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
6767   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6768                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
6769 
6770   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
6771     return false;
6772 
6773   unsigned OuterBitSize = VT.getScalarSizeInBits();
6774   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
6775 
6776   // LL, LH, RL, and RH must be either all NULL or all set to a value.
6777   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
6778          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
6779 
6780   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
6781   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
6782                           bool Signed) -> bool {
6783     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
6784       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
6785       Hi = SDValue(Lo.getNode(), 1);
6786       return true;
6787     }
6788     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
6789       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
6790       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
6791       return true;
6792     }
6793     return false;
6794   };
6795 
6796   SDValue Lo, Hi;
6797 
6798   if (!LL.getNode() && !RL.getNode() &&
6799       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6800     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
6801     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
6802   }
6803 
6804   if (!LL.getNode())
6805     return false;
6806 
6807   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
6808   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
6809       DAG.MaskedValueIsZero(RHS, HighMask)) {
6810     // The inputs are both zero-extended.
6811     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
6812       Result.push_back(Lo);
6813       Result.push_back(Hi);
6814       if (Opcode != ISD::MUL) {
6815         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6816         Result.push_back(Zero);
6817         Result.push_back(Zero);
6818       }
6819       return true;
6820     }
6821   }
6822 
6823   if (!VT.isVector() && Opcode == ISD::MUL &&
6824       DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
6825       DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
6826     // The input values are both sign-extended.
6827     // TODO non-MUL case?
6828     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
6829       Result.push_back(Lo);
6830       Result.push_back(Hi);
6831       return true;
6832     }
6833   }
6834 
6835   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
6836   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
6837   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
6838 
6839   if (!LH.getNode() && !RH.getNode() &&
6840       isOperationLegalOrCustom(ISD::SRL, VT) &&
6841       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6842     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
6843     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6844     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6845     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6846   }
6847 
6848   if (!LH.getNode())
6849     return false;
6850 
6851   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6852     return false;
6853 
6854   Result.push_back(Lo);
6855 
6856   if (Opcode == ISD::MUL) {
6857     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6858     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6859     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6860     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6861     Result.push_back(Hi);
6862     return true;
6863   }
6864 
6865   // Compute the full width result.
6866   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6867     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6868     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6869     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6870     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6871   };
6872 
6873   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6874   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6875     return false;
6876 
6877   // This is effectively the add part of a multiply-add of half-sized operands,
6878   // so it cannot overflow.
6879   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6880 
6881   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6882     return false;
6883 
6884   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6885   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6886 
6887   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6888                   isOperationLegalOrCustom(ISD::ADDE, VT));
6889   if (UseGlue)
6890     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6891                        Merge(Lo, Hi));
6892   else
6893     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6894                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6895 
6896   SDValue Carry = Next.getValue(1);
6897   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6898   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6899 
6900   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6901     return false;
6902 
6903   if (UseGlue)
6904     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6905                      Carry);
6906   else
6907     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6908                      Zero, Carry);
6909 
6910   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6911 
6912   if (Opcode == ISD::SMUL_LOHI) {
6913     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6914                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6915     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6916 
6917     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6918                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6919     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6920   }
6921 
6922   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6923   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6924   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6925   return true;
6926 }
6927 
6928 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6929                                SelectionDAG &DAG, MulExpansionKind Kind,
6930                                SDValue LL, SDValue LH, SDValue RL,
6931                                SDValue RH) const {
6932   SmallVector<SDValue, 2> Result;
6933   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
6934                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6935                            DAG, Kind, LL, LH, RL, RH);
6936   if (Ok) {
6937     assert(Result.size() == 2);
6938     Lo = Result[0];
6939     Hi = Result[1];
6940   }
6941   return Ok;
6942 }
6943 
6944 // Check that (every element of) Z is undef or not an exact multiple of BW.
6945 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
6946   return ISD::matchUnaryPredicate(
6947       Z,
6948       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
6949       true);
6950 }
6951 
6952 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
6953                                           SelectionDAG &DAG) const {
6954   EVT VT = Node->getValueType(0);
6955 
6956   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6957                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6958                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6959                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6960     return SDValue();
6961 
6962   SDValue X = Node->getOperand(0);
6963   SDValue Y = Node->getOperand(1);
6964   SDValue Z = Node->getOperand(2);
6965 
6966   unsigned BW = VT.getScalarSizeInBits();
6967   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6968   SDLoc DL(SDValue(Node, 0));
6969 
6970   EVT ShVT = Z.getValueType();
6971 
6972   // If a funnel shift in the other direction is more supported, use it.
6973   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
6974   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
6975       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
6976     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6977       // fshl X, Y, Z -> fshr X, Y, -Z
6978       // fshr X, Y, Z -> fshl X, Y, -Z
6979       SDValue Zero = DAG.getConstant(0, DL, ShVT);
6980       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
6981     } else {
6982       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6983       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6984       SDValue One = DAG.getConstant(1, DL, ShVT);
6985       if (IsFSHL) {
6986         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6987         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
6988       } else {
6989         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6990         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
6991       }
6992       Z = DAG.getNOT(DL, Z, ShVT);
6993     }
6994     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
6995   }
6996 
6997   SDValue ShX, ShY;
6998   SDValue ShAmt, InvShAmt;
6999   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7000     // fshl: X << C | Y >> (BW - C)
7001     // fshr: X << (BW - C) | Y >> C
7002     // where C = Z % BW is not zero
7003     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7004     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7005     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7006     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7007     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7008   } else {
7009     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7010     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7011     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7012     if (isPowerOf2_32(BW)) {
7013       // Z % BW -> Z & (BW - 1)
7014       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7015       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7016       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7017     } else {
7018       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7019       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7020       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7021     }
7022 
7023     SDValue One = DAG.getConstant(1, DL, ShVT);
7024     if (IsFSHL) {
7025       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7026       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7027       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7028     } else {
7029       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7030       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7031       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7032     }
7033   }
7034   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7035 }
7036 
7037 // TODO: Merge with expandFunnelShift.
7038 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7039                                   SelectionDAG &DAG) const {
7040   EVT VT = Node->getValueType(0);
7041   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7042   bool IsLeft = Node->getOpcode() == ISD::ROTL;
7043   SDValue Op0 = Node->getOperand(0);
7044   SDValue Op1 = Node->getOperand(1);
7045   SDLoc DL(SDValue(Node, 0));
7046 
7047   EVT ShVT = Op1.getValueType();
7048   SDValue Zero = DAG.getConstant(0, DL, ShVT);
7049 
7050   // If a rotate in the other direction is more supported, use it.
7051   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7052   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7053       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7054     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7055     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7056   }
7057 
7058   if (!AllowVectorOps && VT.isVector() &&
7059       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7060        !isOperationLegalOrCustom(ISD::SRL, VT) ||
7061        !isOperationLegalOrCustom(ISD::SUB, VT) ||
7062        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
7063        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7064     return SDValue();
7065 
7066   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7067   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7068   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7069   SDValue ShVal;
7070   SDValue HsVal;
7071   if (isPowerOf2_32(EltSizeInBits)) {
7072     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7073     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7074     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7075     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7076     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7077     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7078     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7079   } else {
7080     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7081     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7082     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7083     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7084     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7085     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7086     SDValue One = DAG.getConstant(1, DL, ShVT);
7087     HsVal =
7088         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
7089   }
7090   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
7091 }
7092 
7093 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7094                                       SelectionDAG &DAG) const {
7095   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7096   EVT VT = Node->getValueType(0);
7097   unsigned VTBits = VT.getScalarSizeInBits();
7098   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7099 
7100   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7101   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7102   SDValue ShOpLo = Node->getOperand(0);
7103   SDValue ShOpHi = Node->getOperand(1);
7104   SDValue ShAmt = Node->getOperand(2);
7105   EVT ShAmtVT = ShAmt.getValueType();
7106   EVT ShAmtCCVT =
7107       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
7108   SDLoc dl(Node);
7109 
7110   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7111   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
7112   // away during isel.
7113   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7114                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
7115   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
7116                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
7117                        : DAG.getConstant(0, dl, VT);
7118 
7119   SDValue Tmp2, Tmp3;
7120   if (IsSHL) {
7121     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
7122     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
7123   } else {
7124     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
7125     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
7126   }
7127 
7128   // If the shift amount is larger or equal than the width of a part we don't
7129   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
7130   // values for large shift amounts.
7131   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7132                                 DAG.getConstant(VTBits, dl, ShAmtVT));
7133   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
7134                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
7135 
7136   if (IsSHL) {
7137     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7138     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7139   } else {
7140     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7141     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7142   }
7143 }
7144 
7145 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
7146                                       SelectionDAG &DAG) const {
7147   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7148   SDValue Src = Node->getOperand(OpNo);
7149   EVT SrcVT = Src.getValueType();
7150   EVT DstVT = Node->getValueType(0);
7151   SDLoc dl(SDValue(Node, 0));
7152 
7153   // FIXME: Only f32 to i64 conversions are supported.
7154   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
7155     return false;
7156 
7157   if (Node->isStrictFPOpcode())
7158     // When a NaN is converted to an integer a trap is allowed. We can't
7159     // use this expansion here because it would eliminate that trap. Other
7160     // traps are also allowed and cannot be eliminated. See
7161     // IEEE 754-2008 sec 5.8.
7162     return false;
7163 
7164   // Expand f32 -> i64 conversion
7165   // This algorithm comes from compiler-rt's implementation of fixsfdi:
7166   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7167   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
7168   EVT IntVT = SrcVT.changeTypeToInteger();
7169   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
7170 
7171   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
7172   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
7173   SDValue Bias = DAG.getConstant(127, dl, IntVT);
7174   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
7175   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
7176   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
7177 
7178   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
7179 
7180   SDValue ExponentBits = DAG.getNode(
7181       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
7182       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
7183   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
7184 
7185   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
7186                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
7187                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
7188   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
7189 
7190   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
7191                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
7192                           DAG.getConstant(0x00800000, dl, IntVT));
7193 
7194   R = DAG.getZExtOrTrunc(R, dl, DstVT);
7195 
7196   R = DAG.getSelectCC(
7197       dl, Exponent, ExponentLoBit,
7198       DAG.getNode(ISD::SHL, dl, DstVT, R,
7199                   DAG.getZExtOrTrunc(
7200                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
7201                       dl, IntShVT)),
7202       DAG.getNode(ISD::SRL, dl, DstVT, R,
7203                   DAG.getZExtOrTrunc(
7204                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
7205                       dl, IntShVT)),
7206       ISD::SETGT);
7207 
7208   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
7209                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
7210 
7211   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
7212                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
7213   return true;
7214 }
7215 
7216 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
7217                                       SDValue &Chain,
7218                                       SelectionDAG &DAG) const {
7219   SDLoc dl(SDValue(Node, 0));
7220   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7221   SDValue Src = Node->getOperand(OpNo);
7222 
7223   EVT SrcVT = Src.getValueType();
7224   EVT DstVT = Node->getValueType(0);
7225   EVT SetCCVT =
7226       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7227   EVT DstSetCCVT =
7228       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7229 
7230   // Only expand vector types if we have the appropriate vector bit operations.
7231   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
7232                                                    ISD::FP_TO_SINT;
7233   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
7234                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
7235     return false;
7236 
7237   // If the maximum float value is smaller then the signed integer range,
7238   // the destination signmask can't be represented by the float, so we can
7239   // just use FP_TO_SINT directly.
7240   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
7241   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
7242   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
7243   if (APFloat::opOverflow &
7244       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
7245     if (Node->isStrictFPOpcode()) {
7246       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7247                            { Node->getOperand(0), Src });
7248       Chain = Result.getValue(1);
7249     } else
7250       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7251     return true;
7252   }
7253 
7254   // Don't expand it if there isn't cheap fsub instruction.
7255   if (!isOperationLegalOrCustom(
7256           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
7257     return false;
7258 
7259   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7260   SDValue Sel;
7261 
7262   if (Node->isStrictFPOpcode()) {
7263     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7264                        Node->getOperand(0), /*IsSignaling*/ true);
7265     Chain = Sel.getValue(1);
7266   } else {
7267     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
7268   }
7269 
7270   bool Strict = Node->isStrictFPOpcode() ||
7271                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
7272 
7273   if (Strict) {
7274     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
7275     // signmask then offset (the result of which should be fully representable).
7276     // Sel = Src < 0x8000000000000000
7277     // FltOfs = select Sel, 0, 0x8000000000000000
7278     // IntOfs = select Sel, 0, 0x8000000000000000
7279     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7280 
7281     // TODO: Should any fast-math-flags be set for the FSUB?
7282     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
7283                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7284     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7285     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
7286                                    DAG.getConstant(0, dl, DstVT),
7287                                    DAG.getConstant(SignMask, dl, DstVT));
7288     SDValue SInt;
7289     if (Node->isStrictFPOpcode()) {
7290       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
7291                                 { Chain, Src, FltOfs });
7292       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7293                          { Val.getValue(1), Val });
7294       Chain = SInt.getValue(1);
7295     } else {
7296       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
7297       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
7298     }
7299     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7300   } else {
7301     // Expand based on maximum range of FP_TO_SINT:
7302     // True = fp_to_sint(Src)
7303     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
7304     // Result = select (Src < 0x8000000000000000), True, False
7305 
7306     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7307     // TODO: Should any fast-math-flags be set for the FSUB?
7308     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
7309                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
7310     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
7311                         DAG.getConstant(SignMask, dl, DstVT));
7312     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7313     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
7314   }
7315   return true;
7316 }
7317 
7318 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
7319                                       SDValue &Chain,
7320                                       SelectionDAG &DAG) const {
7321   // This transform is not correct for converting 0 when rounding mode is set
7322   // to round toward negative infinity which will produce -0.0. So disable under
7323   // strictfp.
7324   if (Node->isStrictFPOpcode())
7325     return false;
7326 
7327   SDValue Src = Node->getOperand(0);
7328   EVT SrcVT = Src.getValueType();
7329   EVT DstVT = Node->getValueType(0);
7330 
7331   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
7332     return false;
7333 
7334   // Only expand vector types if we have the appropriate vector bit operations.
7335   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
7336                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
7337                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
7338                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
7339                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
7340     return false;
7341 
7342   SDLoc dl(SDValue(Node, 0));
7343   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
7344 
7345   // Implementation of unsigned i64 to f64 following the algorithm in
7346   // __floatundidf in compiler_rt.  This implementation performs rounding
7347   // correctly in all rounding modes with the exception of converting 0
7348   // when rounding toward negative infinity. In that case the fsub will produce
7349   // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
7350   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
7351   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
7352       BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
7353   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
7354   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
7355   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
7356 
7357   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
7358   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
7359   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
7360   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
7361   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
7362   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
7363   SDValue HiSub =
7364       DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
7365   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
7366   return true;
7367 }
7368 
7369 SDValue
7370 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
7371                                                SelectionDAG &DAG) const {
7372   unsigned Opcode = Node->getOpcode();
7373   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
7374           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
7375          "Wrong opcode");
7376 
7377   if (Node->getFlags().hasNoNaNs()) {
7378     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
7379     SDValue Op1 = Node->getOperand(0);
7380     SDValue Op2 = Node->getOperand(1);
7381     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
7382     // Copy FMF flags, but always set the no-signed-zeros flag
7383     // as this is implied by the FMINNUM/FMAXNUM semantics.
7384     SDNodeFlags Flags = Node->getFlags();
7385     Flags.setNoSignedZeros(true);
7386     SelCC->setFlags(Flags);
7387     return SelCC;
7388   }
7389 
7390   return SDValue();
7391 }
7392 
7393 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
7394                                               SelectionDAG &DAG) const {
7395   SDLoc dl(Node);
7396   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
7397     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7398   EVT VT = Node->getValueType(0);
7399 
7400   if (VT.isScalableVector())
7401     report_fatal_error(
7402         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
7403 
7404   if (isOperationLegalOrCustom(NewOp, VT)) {
7405     SDValue Quiet0 = Node->getOperand(0);
7406     SDValue Quiet1 = Node->getOperand(1);
7407 
7408     if (!Node->getFlags().hasNoNaNs()) {
7409       // Insert canonicalizes if it's possible we need to quiet to get correct
7410       // sNaN behavior.
7411       if (!DAG.isKnownNeverSNaN(Quiet0)) {
7412         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
7413                              Node->getFlags());
7414       }
7415       if (!DAG.isKnownNeverSNaN(Quiet1)) {
7416         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
7417                              Node->getFlags());
7418       }
7419     }
7420 
7421     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
7422   }
7423 
7424   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
7425   // instead if there are no NaNs.
7426   if (Node->getFlags().hasNoNaNs()) {
7427     unsigned IEEE2018Op =
7428         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
7429     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
7430       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
7431                          Node->getOperand(1), Node->getFlags());
7432     }
7433   }
7434 
7435   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
7436     return SelCC;
7437 
7438   return SDValue();
7439 }
7440 
7441 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
7442                                          unsigned Test, SDNodeFlags Flags,
7443                                          const SDLoc &DL,
7444                                          SelectionDAG &DAG) const {
7445   EVT OperandVT = Op.getValueType();
7446   assert(OperandVT.isFloatingPoint());
7447 
7448   // Degenerated cases.
7449   if (Test == 0)
7450     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
7451   if ((Test & fcAllFlags) == fcAllFlags)
7452     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
7453 
7454   // PPC double double is a pair of doubles, of which the higher part determines
7455   // the value class.
7456   if (OperandVT == MVT::ppcf128) {
7457     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
7458                      DAG.getConstant(1, DL, MVT::i32));
7459     OperandVT = MVT::f64;
7460   }
7461 
7462   // Some checks may be represented as inversion of simpler check, for example
7463   // "inf|normal|subnormal|zero" => !"nan".
7464   bool IsInverted = false;
7465   if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
7466     IsInverted = true;
7467     Test = InvertedCheck;
7468   }
7469 
7470   // Floating-point type properties.
7471   EVT ScalarFloatVT = OperandVT.getScalarType();
7472   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
7473   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
7474   bool IsF80 = (ScalarFloatVT == MVT::f80);
7475 
7476   // Some checks can be implemented using float comparisons, if floating point
7477   // exceptions are ignored.
7478   if (Flags.hasNoFPExcept() &&
7479       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
7480     if (Test == fcZero)
7481       return DAG.getSetCC(DL, ResultVT, Op,
7482                           DAG.getConstantFP(0.0, DL, OperandVT),
7483                           IsInverted ? ISD::SETUNE : ISD::SETOEQ);
7484     if (Test == fcNan)
7485       return DAG.getSetCC(DL, ResultVT, Op, Op,
7486                           IsInverted ? ISD::SETO : ISD::SETUO);
7487   }
7488 
7489   // In the general case use integer operations.
7490   unsigned BitSize = OperandVT.getScalarSizeInBits();
7491   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
7492   if (OperandVT.isVector())
7493     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
7494                              OperandVT.getVectorElementCount());
7495   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
7496 
7497   // Various masks.
7498   APInt SignBit = APInt::getSignMask(BitSize);
7499   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
7500   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
7501   const unsigned ExplicitIntBitInF80 = 63;
7502   APInt ExpMask = Inf;
7503   if (IsF80)
7504     ExpMask.clearBit(ExplicitIntBitInF80);
7505   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
7506   APInt QNaNBitMask =
7507       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
7508   APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
7509 
7510   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
7511   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
7512   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
7513   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
7514   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
7515   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
7516 
7517   SDValue Res;
7518   const auto appendResult = [&](SDValue PartialRes) {
7519     if (PartialRes) {
7520       if (Res)
7521         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
7522       else
7523         Res = PartialRes;
7524     }
7525   };
7526 
7527   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
7528   const auto getIntBitIsSet = [&]() -> SDValue {
7529     if (!IntBitIsSetV) {
7530       APInt IntBitMask(BitSize, 0);
7531       IntBitMask.setBit(ExplicitIntBitInF80);
7532       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
7533       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
7534       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
7535     }
7536     return IntBitIsSetV;
7537   };
7538 
7539   // Split the value into sign bit and absolute value.
7540   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
7541   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
7542                                DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
7543 
7544   // Tests that involve more than one class should be processed first.
7545   SDValue PartialRes;
7546 
7547   if (IsF80)
7548     ; // Detect finite numbers of f80 by checking individual classes because
7549       // they have different settings of the explicit integer bit.
7550   else if ((Test & fcFinite) == fcFinite) {
7551     // finite(V) ==> abs(V) < exp_mask
7552     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
7553     Test &= ~fcFinite;
7554   } else if ((Test & fcFinite) == fcPosFinite) {
7555     // finite(V) && V > 0 ==> V < exp_mask
7556     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
7557     Test &= ~fcPosFinite;
7558   } else if ((Test & fcFinite) == fcNegFinite) {
7559     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
7560     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
7561     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7562     Test &= ~fcNegFinite;
7563   }
7564   appendResult(PartialRes);
7565 
7566   // Check for individual classes.
7567 
7568   if (unsigned PartialCheck = Test & fcZero) {
7569     if (PartialCheck == fcPosZero)
7570       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
7571     else if (PartialCheck == fcZero)
7572       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
7573     else // ISD::fcNegZero
7574       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
7575     appendResult(PartialRes);
7576   }
7577 
7578   if (unsigned PartialCheck = Test & fcInf) {
7579     if (PartialCheck == fcPosInf)
7580       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
7581     else if (PartialCheck == fcInf)
7582       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
7583     else { // ISD::fcNegInf
7584       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
7585       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
7586       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
7587     }
7588     appendResult(PartialRes);
7589   }
7590 
7591   if (unsigned PartialCheck = Test & fcNan) {
7592     APInt InfWithQnanBit = Inf | QNaNBitMask;
7593     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
7594     if (PartialCheck == fcNan) {
7595       // isnan(V) ==> abs(V) > int(inf)
7596       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
7597       if (IsF80) {
7598         // Recognize unsupported values as NaNs for compatibility with glibc.
7599         // In them (exp(V)==0) == int_bit.
7600         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
7601         SDValue ExpIsZero =
7602             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
7603         SDValue IsPseudo =
7604             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
7605         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
7606       }
7607     } else if (PartialCheck == fcQNan) {
7608       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
7609       PartialRes =
7610           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
7611     } else { // ISD::fcSNan
7612       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
7613       //                    abs(V) < (unsigned(Inf) | quiet_bit)
7614       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
7615       SDValue IsNotQnan =
7616           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
7617       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
7618     }
7619     appendResult(PartialRes);
7620   }
7621 
7622   if (unsigned PartialCheck = Test & fcSubnormal) {
7623     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
7624     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
7625     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
7626     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
7627     SDValue VMinusOneV =
7628         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
7629     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
7630     if (PartialCheck == fcNegSubnormal)
7631       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7632     appendResult(PartialRes);
7633   }
7634 
7635   if (unsigned PartialCheck = Test & fcNormal) {
7636     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
7637     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
7638     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
7639     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
7640     APInt ExpLimit = ExpMask - ExpLSB;
7641     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
7642     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
7643     if (PartialCheck == fcNegNormal)
7644       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7645     else if (PartialCheck == fcPosNormal) {
7646       SDValue PosSignV =
7647           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
7648       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
7649     }
7650     if (IsF80)
7651       PartialRes =
7652           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
7653     appendResult(PartialRes);
7654   }
7655 
7656   if (!Res)
7657     return DAG.getConstant(IsInverted, DL, ResultVT);
7658   if (IsInverted)
7659     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
7660   return Res;
7661 }
7662 
7663 // Only expand vector types if we have the appropriate vector bit operations.
7664 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
7665   assert(VT.isVector() && "Expected vector type");
7666   unsigned Len = VT.getScalarSizeInBits();
7667   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
7668          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
7669          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
7670          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
7671          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
7672 }
7673 
7674 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
7675   SDLoc dl(Node);
7676   EVT VT = Node->getValueType(0);
7677   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7678   SDValue Op = Node->getOperand(0);
7679   unsigned Len = VT.getScalarSizeInBits();
7680   assert(VT.isInteger() && "CTPOP not implemented for this type.");
7681 
7682   // TODO: Add support for irregular type lengths.
7683   if (!(Len <= 128 && Len % 8 == 0))
7684     return SDValue();
7685 
7686   // Only expand vector types if we have the appropriate vector bit operations.
7687   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
7688     return SDValue();
7689 
7690   // This is the "best" algorithm from
7691   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
7692   SDValue Mask55 =
7693       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
7694   SDValue Mask33 =
7695       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
7696   SDValue Mask0F =
7697       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
7698 
7699   // v = v - ((v >> 1) & 0x55555555...)
7700   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
7701                    DAG.getNode(ISD::AND, dl, VT,
7702                                DAG.getNode(ISD::SRL, dl, VT, Op,
7703                                            DAG.getConstant(1, dl, ShVT)),
7704                                Mask55));
7705   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
7706   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
7707                    DAG.getNode(ISD::AND, dl, VT,
7708                                DAG.getNode(ISD::SRL, dl, VT, Op,
7709                                            DAG.getConstant(2, dl, ShVT)),
7710                                Mask33));
7711   // v = (v + (v >> 4)) & 0x0F0F0F0F...
7712   Op = DAG.getNode(ISD::AND, dl, VT,
7713                    DAG.getNode(ISD::ADD, dl, VT, Op,
7714                                DAG.getNode(ISD::SRL, dl, VT, Op,
7715                                            DAG.getConstant(4, dl, ShVT))),
7716                    Mask0F);
7717 
7718   if (Len <= 8)
7719     return Op;
7720 
7721   // Avoid the multiply if we only have 2 bytes to add.
7722   // TODO: Only doing this for scalars because vectors weren't as obviously
7723   // improved.
7724   if (Len == 16 && !VT.isVector()) {
7725     // v = (v + (v >> 8)) & 0x00FF;
7726     return DAG.getNode(ISD::AND, dl, VT,
7727                      DAG.getNode(ISD::ADD, dl, VT, Op,
7728                                  DAG.getNode(ISD::SRL, dl, VT, Op,
7729                                              DAG.getConstant(8, dl, ShVT))),
7730                      DAG.getConstant(0xFF, dl, VT));
7731   }
7732 
7733   // v = (v * 0x01010101...) >> (Len - 8)
7734   SDValue Mask01 =
7735       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
7736   return DAG.getNode(ISD::SRL, dl, VT,
7737                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
7738                      DAG.getConstant(Len - 8, dl, ShVT));
7739 }
7740 
7741 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
7742   SDLoc dl(Node);
7743   EVT VT = Node->getValueType(0);
7744   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7745   SDValue Op = Node->getOperand(0);
7746   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7747 
7748   // If the non-ZERO_UNDEF version is supported we can use that instead.
7749   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
7750       isOperationLegalOrCustom(ISD::CTLZ, VT))
7751     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
7752 
7753   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7754   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
7755     EVT SetCCVT =
7756         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7757     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
7758     SDValue Zero = DAG.getConstant(0, dl, VT);
7759     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7760     return DAG.getSelect(dl, VT, SrcIsZero,
7761                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
7762   }
7763 
7764   // Only expand vector types if we have the appropriate vector bit operations.
7765   // This includes the operations needed to expand CTPOP if it isn't supported.
7766   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7767                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7768                          !canExpandVectorCTPOP(*this, VT)) ||
7769                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7770                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7771     return SDValue();
7772 
7773   // for now, we do this:
7774   // x = x | (x >> 1);
7775   // x = x | (x >> 2);
7776   // ...
7777   // x = x | (x >>16);
7778   // x = x | (x >>32); // for 64-bit input
7779   // return popcount(~x);
7780   //
7781   // Ref: "Hacker's Delight" by Henry Warren
7782   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
7783     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
7784     Op = DAG.getNode(ISD::OR, dl, VT, Op,
7785                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
7786   }
7787   Op = DAG.getNOT(dl, Op, VT);
7788   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
7789 }
7790 
7791 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
7792   SDLoc dl(Node);
7793   EVT VT = Node->getValueType(0);
7794   SDValue Op = Node->getOperand(0);
7795   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7796 
7797   // If the non-ZERO_UNDEF version is supported we can use that instead.
7798   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
7799       isOperationLegalOrCustom(ISD::CTTZ, VT))
7800     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
7801 
7802   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7803   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
7804     EVT SetCCVT =
7805         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7806     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
7807     SDValue Zero = DAG.getConstant(0, dl, VT);
7808     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7809     return DAG.getSelect(dl, VT, SrcIsZero,
7810                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
7811   }
7812 
7813   // Only expand vector types if we have the appropriate vector bit operations.
7814   // This includes the operations needed to expand CTPOP if it isn't supported.
7815   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7816                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7817                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
7818                          !canExpandVectorCTPOP(*this, VT)) ||
7819                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7820                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
7821                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7822     return SDValue();
7823 
7824   // for now, we use: { return popcount(~x & (x - 1)); }
7825   // unless the target has ctlz but not ctpop, in which case we use:
7826   // { return 32 - nlz(~x & (x-1)); }
7827   // Ref: "Hacker's Delight" by Henry Warren
7828   SDValue Tmp = DAG.getNode(
7829       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
7830       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
7831 
7832   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
7833   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
7834     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
7835                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
7836   }
7837 
7838   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
7839 }
7840 
7841 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
7842                                   bool IsNegative) const {
7843   SDLoc dl(N);
7844   EVT VT = N->getValueType(0);
7845   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7846   SDValue Op = N->getOperand(0);
7847 
7848   // abs(x) -> smax(x,sub(0,x))
7849   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7850       isOperationLegal(ISD::SMAX, VT)) {
7851     SDValue Zero = DAG.getConstant(0, dl, VT);
7852     return DAG.getNode(ISD::SMAX, dl, VT, Op,
7853                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7854   }
7855 
7856   // abs(x) -> umin(x,sub(0,x))
7857   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7858       isOperationLegal(ISD::UMIN, VT)) {
7859     SDValue Zero = DAG.getConstant(0, dl, VT);
7860     Op = DAG.getFreeze(Op);
7861     return DAG.getNode(ISD::UMIN, dl, VT, Op,
7862                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7863   }
7864 
7865   // 0 - abs(x) -> smin(x, sub(0,x))
7866   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
7867       isOperationLegal(ISD::SMIN, VT)) {
7868     Op = DAG.getFreeze(Op);
7869     SDValue Zero = DAG.getConstant(0, dl, VT);
7870     return DAG.getNode(ISD::SMIN, dl, VT, Op,
7871                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7872   }
7873 
7874   // Only expand vector types if we have the appropriate vector operations.
7875   if (VT.isVector() &&
7876       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
7877        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
7878        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
7879        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7880     return SDValue();
7881 
7882   Op = DAG.getFreeze(Op);
7883   SDValue Shift =
7884       DAG.getNode(ISD::SRA, dl, VT, Op,
7885                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
7886   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
7887 
7888   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
7889   if (!IsNegative)
7890     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
7891 
7892   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
7893   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
7894 }
7895 
7896 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
7897   SDLoc dl(N);
7898   EVT VT = N->getValueType(0);
7899   SDValue Op = N->getOperand(0);
7900 
7901   if (!VT.isSimple())
7902     return SDValue();
7903 
7904   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7905   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
7906   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
7907   default:
7908     return SDValue();
7909   case MVT::i16:
7910     // Use a rotate by 8. This can be further expanded if necessary.
7911     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7912   case MVT::i32:
7913     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7914     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7915     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7916     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7917     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7918                        DAG.getConstant(0xFF0000, dl, VT));
7919     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
7920     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7921     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7922     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7923   case MVT::i64:
7924     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7925     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7926     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7927     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7928     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7929     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7930     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7931     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7932     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
7933                        DAG.getConstant(255ULL<<48, dl, VT));
7934     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
7935                        DAG.getConstant(255ULL<<40, dl, VT));
7936     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
7937                        DAG.getConstant(255ULL<<32, dl, VT));
7938     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
7939                        DAG.getConstant(255ULL<<24, dl, VT));
7940     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7941                        DAG.getConstant(255ULL<<16, dl, VT));
7942     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
7943                        DAG.getConstant(255ULL<<8 , dl, VT));
7944     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
7945     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
7946     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7947     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7948     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
7949     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7950     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
7951   }
7952 }
7953 
7954 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
7955   SDLoc dl(N);
7956   EVT VT = N->getValueType(0);
7957   SDValue Op = N->getOperand(0);
7958   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7959   unsigned Sz = VT.getScalarSizeInBits();
7960 
7961   SDValue Tmp, Tmp2, Tmp3;
7962 
7963   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
7964   // and finally the i1 pairs.
7965   // TODO: We can easily support i4/i2 legal types if any target ever does.
7966   if (Sz >= 8 && isPowerOf2_32(Sz)) {
7967     // Create the masks - repeating the pattern every byte.
7968     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
7969     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
7970     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
7971 
7972     // BSWAP if the type is wider than a single byte.
7973     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
7974 
7975     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
7976     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
7977     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
7978     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
7979     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
7980     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7981 
7982     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
7983     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
7984     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
7985     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
7986     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
7987     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7988 
7989     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
7990     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
7991     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
7992     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
7993     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
7994     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7995     return Tmp;
7996   }
7997 
7998   Tmp = DAG.getConstant(0, dl, VT);
7999   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
8000     if (I < J)
8001       Tmp2 =
8002           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
8003     else
8004       Tmp2 =
8005           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
8006 
8007     APInt Shift(Sz, 1);
8008     Shift <<= J;
8009     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
8010     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
8011   }
8012 
8013   return Tmp;
8014 }
8015 
8016 std::pair<SDValue, SDValue>
8017 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
8018                                     SelectionDAG &DAG) const {
8019   SDLoc SL(LD);
8020   SDValue Chain = LD->getChain();
8021   SDValue BasePTR = LD->getBasePtr();
8022   EVT SrcVT = LD->getMemoryVT();
8023   EVT DstVT = LD->getValueType(0);
8024   ISD::LoadExtType ExtType = LD->getExtensionType();
8025 
8026   if (SrcVT.isScalableVector())
8027     report_fatal_error("Cannot scalarize scalable vector loads");
8028 
8029   unsigned NumElem = SrcVT.getVectorNumElements();
8030 
8031   EVT SrcEltVT = SrcVT.getScalarType();
8032   EVT DstEltVT = DstVT.getScalarType();
8033 
8034   // A vector must always be stored in memory as-is, i.e. without any padding
8035   // between the elements, since various code depend on it, e.g. in the
8036   // handling of a bitcast of a vector type to int, which may be done with a
8037   // vector store followed by an integer load. A vector that does not have
8038   // elements that are byte-sized must therefore be stored as an integer
8039   // built out of the extracted vector elements.
8040   if (!SrcEltVT.isByteSized()) {
8041     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
8042     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
8043 
8044     unsigned NumSrcBits = SrcVT.getSizeInBits();
8045     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
8046 
8047     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
8048     SDValue SrcEltBitMask = DAG.getConstant(
8049         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
8050 
8051     // Load the whole vector and avoid masking off the top bits as it makes
8052     // the codegen worse.
8053     SDValue Load =
8054         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
8055                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
8056                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
8057 
8058     SmallVector<SDValue, 8> Vals;
8059     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8060       unsigned ShiftIntoIdx =
8061           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8062       SDValue ShiftAmount =
8063           DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
8064                                      LoadVT, SL, /*LegalTypes=*/false);
8065       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
8066       SDValue Elt =
8067           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
8068       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
8069 
8070       if (ExtType != ISD::NON_EXTLOAD) {
8071         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
8072         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
8073       }
8074 
8075       Vals.push_back(Scalar);
8076     }
8077 
8078     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8079     return std::make_pair(Value, Load.getValue(1));
8080   }
8081 
8082   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
8083   assert(SrcEltVT.isByteSized());
8084 
8085   SmallVector<SDValue, 8> Vals;
8086   SmallVector<SDValue, 8> LoadChains;
8087 
8088   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8089     SDValue ScalarLoad =
8090         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
8091                        LD->getPointerInfo().getWithOffset(Idx * Stride),
8092                        SrcEltVT, LD->getOriginalAlign(),
8093                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
8094 
8095     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
8096 
8097     Vals.push_back(ScalarLoad.getValue(0));
8098     LoadChains.push_back(ScalarLoad.getValue(1));
8099   }
8100 
8101   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
8102   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8103 
8104   return std::make_pair(Value, NewChain);
8105 }
8106 
8107 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
8108                                              SelectionDAG &DAG) const {
8109   SDLoc SL(ST);
8110 
8111   SDValue Chain = ST->getChain();
8112   SDValue BasePtr = ST->getBasePtr();
8113   SDValue Value = ST->getValue();
8114   EVT StVT = ST->getMemoryVT();
8115 
8116   if (StVT.isScalableVector())
8117     report_fatal_error("Cannot scalarize scalable vector stores");
8118 
8119   // The type of the data we want to save
8120   EVT RegVT = Value.getValueType();
8121   EVT RegSclVT = RegVT.getScalarType();
8122 
8123   // The type of data as saved in memory.
8124   EVT MemSclVT = StVT.getScalarType();
8125 
8126   unsigned NumElem = StVT.getVectorNumElements();
8127 
8128   // A vector must always be stored in memory as-is, i.e. without any padding
8129   // between the elements, since various code depend on it, e.g. in the
8130   // handling of a bitcast of a vector type to int, which may be done with a
8131   // vector store followed by an integer load. A vector that does not have
8132   // elements that are byte-sized must therefore be stored as an integer
8133   // built out of the extracted vector elements.
8134   if (!MemSclVT.isByteSized()) {
8135     unsigned NumBits = StVT.getSizeInBits();
8136     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
8137 
8138     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
8139 
8140     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8141       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
8142                                 DAG.getVectorIdxConstant(Idx, SL));
8143       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
8144       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
8145       unsigned ShiftIntoIdx =
8146           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8147       SDValue ShiftAmount =
8148           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
8149       SDValue ShiftedElt =
8150           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
8151       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
8152     }
8153 
8154     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
8155                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
8156                         ST->getAAInfo());
8157   }
8158 
8159   // Store Stride in bytes
8160   unsigned Stride = MemSclVT.getSizeInBits() / 8;
8161   assert(Stride && "Zero stride!");
8162   // Extract each of the elements from the original vector and save them into
8163   // memory individually.
8164   SmallVector<SDValue, 8> Stores;
8165   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8166     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
8167                               DAG.getVectorIdxConstant(Idx, SL));
8168 
8169     SDValue Ptr =
8170         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
8171 
8172     // This scalar TruncStore may be illegal, but we legalize it later.
8173     SDValue Store = DAG.getTruncStore(
8174         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
8175         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
8176         ST->getAAInfo());
8177 
8178     Stores.push_back(Store);
8179   }
8180 
8181   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
8182 }
8183 
8184 std::pair<SDValue, SDValue>
8185 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
8186   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
8187          "unaligned indexed loads not implemented!");
8188   SDValue Chain = LD->getChain();
8189   SDValue Ptr = LD->getBasePtr();
8190   EVT VT = LD->getValueType(0);
8191   EVT LoadedVT = LD->getMemoryVT();
8192   SDLoc dl(LD);
8193   auto &MF = DAG.getMachineFunction();
8194 
8195   if (VT.isFloatingPoint() || VT.isVector()) {
8196     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
8197     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
8198       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
8199           LoadedVT.isVector()) {
8200         // Scalarize the load and let the individual components be handled.
8201         return scalarizeVectorLoad(LD, DAG);
8202       }
8203 
8204       // Expand to a (misaligned) integer load of the same size,
8205       // then bitconvert to floating point or vector.
8206       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
8207                                     LD->getMemOperand());
8208       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
8209       if (LoadedVT != VT)
8210         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
8211                              ISD::ANY_EXTEND, dl, VT, Result);
8212 
8213       return std::make_pair(Result, newLoad.getValue(1));
8214     }
8215 
8216     // Copy the value to a (aligned) stack slot using (unaligned) integer
8217     // loads and stores, then do a (aligned) load from the stack slot.
8218     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
8219     unsigned LoadedBytes = LoadedVT.getStoreSize();
8220     unsigned RegBytes = RegVT.getSizeInBits() / 8;
8221     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
8222 
8223     // Make sure the stack slot is also aligned for the register type.
8224     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
8225     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
8226     SmallVector<SDValue, 8> Stores;
8227     SDValue StackPtr = StackBase;
8228     unsigned Offset = 0;
8229 
8230     EVT PtrVT = Ptr.getValueType();
8231     EVT StackPtrVT = StackPtr.getValueType();
8232 
8233     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
8234     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
8235 
8236     // Do all but one copies using the full register width.
8237     for (unsigned i = 1; i < NumRegs; i++) {
8238       // Load one integer register's worth from the original location.
8239       SDValue Load = DAG.getLoad(
8240           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
8241           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
8242           LD->getAAInfo());
8243       // Follow the load with a store to the stack slot.  Remember the store.
8244       Stores.push_back(DAG.getStore(
8245           Load.getValue(1), dl, Load, StackPtr,
8246           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
8247       // Increment the pointers.
8248       Offset += RegBytes;
8249 
8250       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
8251       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
8252     }
8253 
8254     // The last copy may be partial.  Do an extending load.
8255     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
8256                                   8 * (LoadedBytes - Offset));
8257     SDValue Load =
8258         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
8259                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
8260                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
8261                        LD->getAAInfo());
8262     // Follow the load with a store to the stack slot.  Remember the store.
8263     // On big-endian machines this requires a truncating store to ensure
8264     // that the bits end up in the right place.
8265     Stores.push_back(DAG.getTruncStore(
8266         Load.getValue(1), dl, Load, StackPtr,
8267         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
8268 
8269     // The order of the stores doesn't matter - say it with a TokenFactor.
8270     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8271 
8272     // Finally, perform the original load only redirected to the stack slot.
8273     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
8274                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
8275                           LoadedVT);
8276 
8277     // Callers expect a MERGE_VALUES node.
8278     return std::make_pair(Load, TF);
8279   }
8280 
8281   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
8282          "Unaligned load of unsupported type.");
8283 
8284   // Compute the new VT that is half the size of the old one.  This is an
8285   // integer MVT.
8286   unsigned NumBits = LoadedVT.getSizeInBits();
8287   EVT NewLoadedVT;
8288   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
8289   NumBits >>= 1;
8290 
8291   Align Alignment = LD->getOriginalAlign();
8292   unsigned IncrementSize = NumBits / 8;
8293   ISD::LoadExtType HiExtType = LD->getExtensionType();
8294 
8295   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
8296   if (HiExtType == ISD::NON_EXTLOAD)
8297     HiExtType = ISD::ZEXTLOAD;
8298 
8299   // Load the value in two parts
8300   SDValue Lo, Hi;
8301   if (DAG.getDataLayout().isLittleEndian()) {
8302     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
8303                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8304                         LD->getAAInfo());
8305 
8306     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8307     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
8308                         LD->getPointerInfo().getWithOffset(IncrementSize),
8309                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8310                         LD->getAAInfo());
8311   } else {
8312     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
8313                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8314                         LD->getAAInfo());
8315 
8316     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8317     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
8318                         LD->getPointerInfo().getWithOffset(IncrementSize),
8319                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8320                         LD->getAAInfo());
8321   }
8322 
8323   // aggregate the two parts
8324   SDValue ShiftAmount =
8325       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
8326                                                     DAG.getDataLayout()));
8327   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
8328   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
8329 
8330   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
8331                              Hi.getValue(1));
8332 
8333   return std::make_pair(Result, TF);
8334 }
8335 
8336 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
8337                                              SelectionDAG &DAG) const {
8338   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
8339          "unaligned indexed stores not implemented!");
8340   SDValue Chain = ST->getChain();
8341   SDValue Ptr = ST->getBasePtr();
8342   SDValue Val = ST->getValue();
8343   EVT VT = Val.getValueType();
8344   Align Alignment = ST->getOriginalAlign();
8345   auto &MF = DAG.getMachineFunction();
8346   EVT StoreMemVT = ST->getMemoryVT();
8347 
8348   SDLoc dl(ST);
8349   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
8350     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
8351     if (isTypeLegal(intVT)) {
8352       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
8353           StoreMemVT.isVector()) {
8354         // Scalarize the store and let the individual components be handled.
8355         SDValue Result = scalarizeVectorStore(ST, DAG);
8356         return Result;
8357       }
8358       // Expand to a bitconvert of the value to the integer type of the
8359       // same size, then a (misaligned) int store.
8360       // FIXME: Does not handle truncating floating point stores!
8361       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
8362       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
8363                             Alignment, ST->getMemOperand()->getFlags());
8364       return Result;
8365     }
8366     // Do a (aligned) store to a stack slot, then copy from the stack slot
8367     // to the final destination using (unaligned) integer loads and stores.
8368     MVT RegVT = getRegisterType(
8369         *DAG.getContext(),
8370         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
8371     EVT PtrVT = Ptr.getValueType();
8372     unsigned StoredBytes = StoreMemVT.getStoreSize();
8373     unsigned RegBytes = RegVT.getSizeInBits() / 8;
8374     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
8375 
8376     // Make sure the stack slot is also aligned for the register type.
8377     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
8378     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
8379 
8380     // Perform the original store, only redirected to the stack slot.
8381     SDValue Store = DAG.getTruncStore(
8382         Chain, dl, Val, StackPtr,
8383         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
8384 
8385     EVT StackPtrVT = StackPtr.getValueType();
8386 
8387     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
8388     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
8389     SmallVector<SDValue, 8> Stores;
8390     unsigned Offset = 0;
8391 
8392     // Do all but one copies using the full register width.
8393     for (unsigned i = 1; i < NumRegs; i++) {
8394       // Load one integer register's worth from the stack slot.
8395       SDValue Load = DAG.getLoad(
8396           RegVT, dl, Store, StackPtr,
8397           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
8398       // Store it to the final location.  Remember the store.
8399       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
8400                                     ST->getPointerInfo().getWithOffset(Offset),
8401                                     ST->getOriginalAlign(),
8402                                     ST->getMemOperand()->getFlags()));
8403       // Increment the pointers.
8404       Offset += RegBytes;
8405       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
8406       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
8407     }
8408 
8409     // The last store may be partial.  Do a truncating store.  On big-endian
8410     // machines this requires an extending load from the stack slot to ensure
8411     // that the bits are in the right place.
8412     EVT LoadMemVT =
8413         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
8414 
8415     // Load from the stack slot.
8416     SDValue Load = DAG.getExtLoad(
8417         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
8418         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
8419 
8420     Stores.push_back(
8421         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
8422                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
8423                           ST->getOriginalAlign(),
8424                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
8425     // The order of the stores doesn't matter - say it with a TokenFactor.
8426     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8427     return Result;
8428   }
8429 
8430   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
8431          "Unaligned store of unknown type.");
8432   // Get the half-size VT
8433   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
8434   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
8435   unsigned IncrementSize = NumBits / 8;
8436 
8437   // Divide the stored value in two parts.
8438   SDValue ShiftAmount = DAG.getConstant(
8439       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
8440   SDValue Lo = Val;
8441   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
8442 
8443   // Store the two parts
8444   SDValue Store1, Store2;
8445   Store1 = DAG.getTruncStore(Chain, dl,
8446                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
8447                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
8448                              ST->getMemOperand()->getFlags());
8449 
8450   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8451   Store2 = DAG.getTruncStore(
8452       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
8453       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
8454       ST->getMemOperand()->getFlags(), ST->getAAInfo());
8455 
8456   SDValue Result =
8457       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
8458   return Result;
8459 }
8460 
8461 SDValue
8462 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
8463                                        const SDLoc &DL, EVT DataVT,
8464                                        SelectionDAG &DAG,
8465                                        bool IsCompressedMemory) const {
8466   SDValue Increment;
8467   EVT AddrVT = Addr.getValueType();
8468   EVT MaskVT = Mask.getValueType();
8469   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
8470          "Incompatible types of Data and Mask");
8471   if (IsCompressedMemory) {
8472     if (DataVT.isScalableVector())
8473       report_fatal_error(
8474           "Cannot currently handle compressed memory with scalable vectors");
8475     // Incrementing the pointer according to number of '1's in the mask.
8476     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
8477     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
8478     if (MaskIntVT.getSizeInBits() < 32) {
8479       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
8480       MaskIntVT = MVT::i32;
8481     }
8482 
8483     // Count '1's with POPCNT.
8484     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
8485     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
8486     // Scale is an element size in bytes.
8487     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
8488                                     AddrVT);
8489     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
8490   } else if (DataVT.isScalableVector()) {
8491     Increment = DAG.getVScale(DL, AddrVT,
8492                               APInt(AddrVT.getFixedSizeInBits(),
8493                                     DataVT.getStoreSize().getKnownMinSize()));
8494   } else
8495     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
8496 
8497   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
8498 }
8499 
8500 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
8501                                        EVT VecVT, const SDLoc &dl,
8502                                        ElementCount SubEC) {
8503   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
8504          "Cannot index a scalable vector within a fixed-width vector");
8505 
8506   unsigned NElts = VecVT.getVectorMinNumElements();
8507   unsigned NumSubElts = SubEC.getKnownMinValue();
8508   EVT IdxVT = Idx.getValueType();
8509 
8510   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
8511     // If this is a constant index and we know the value plus the number of the
8512     // elements in the subvector minus one is less than the minimum number of
8513     // elements then it's safe to return Idx.
8514     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
8515       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
8516         return Idx;
8517     SDValue VS =
8518         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
8519     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
8520     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
8521                               DAG.getConstant(NumSubElts, dl, IdxVT));
8522     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
8523   }
8524   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
8525     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
8526     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
8527                        DAG.getConstant(Imm, dl, IdxVT));
8528   }
8529   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
8530   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
8531                      DAG.getConstant(MaxIndex, dl, IdxVT));
8532 }
8533 
8534 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
8535                                                 SDValue VecPtr, EVT VecVT,
8536                                                 SDValue Index) const {
8537   return getVectorSubVecPointer(
8538       DAG, VecPtr, VecVT,
8539       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
8540       Index);
8541 }
8542 
8543 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
8544                                                SDValue VecPtr, EVT VecVT,
8545                                                EVT SubVecVT,
8546                                                SDValue Index) const {
8547   SDLoc dl(Index);
8548   // Make sure the index type is big enough to compute in.
8549   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
8550 
8551   EVT EltVT = VecVT.getVectorElementType();
8552 
8553   // Calculate the element offset and add it to the pointer.
8554   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
8555   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
8556          "Converting bits to bytes lost precision");
8557   assert(SubVecVT.getVectorElementType() == EltVT &&
8558          "Sub-vector must be a vector with matching element type");
8559   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
8560                                   SubVecVT.getVectorElementCount());
8561 
8562   EVT IdxVT = Index.getValueType();
8563   if (SubVecVT.isScalableVector())
8564     Index =
8565         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
8566                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
8567 
8568   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
8569                       DAG.getConstant(EltSize, dl, IdxVT));
8570   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
8571 }
8572 
8573 //===----------------------------------------------------------------------===//
8574 // Implementation of Emulated TLS Model
8575 //===----------------------------------------------------------------------===//
8576 
8577 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
8578                                                 SelectionDAG &DAG) const {
8579   // Access to address of TLS varialbe xyz is lowered to a function call:
8580   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
8581   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8582   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
8583   SDLoc dl(GA);
8584 
8585   ArgListTy Args;
8586   ArgListEntry Entry;
8587   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
8588   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
8589   StringRef EmuTlsVarName(NameString);
8590   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
8591   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
8592   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
8593   Entry.Ty = VoidPtrType;
8594   Args.push_back(Entry);
8595 
8596   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
8597 
8598   TargetLowering::CallLoweringInfo CLI(DAG);
8599   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
8600   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
8601   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
8602 
8603   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
8604   // At last for X86 targets, maybe good for other targets too?
8605   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8606   MFI.setAdjustsStack(true); // Is this only for X86 target?
8607   MFI.setHasCalls(true);
8608 
8609   assert((GA->getOffset() == 0) &&
8610          "Emulated TLS must have zero offset in GlobalAddressSDNode");
8611   return CallResult.first;
8612 }
8613 
8614 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
8615                                                 SelectionDAG &DAG) const {
8616   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
8617   if (!isCtlzFast())
8618     return SDValue();
8619   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8620   SDLoc dl(Op);
8621   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8622     if (C->isZero() && CC == ISD::SETEQ) {
8623       EVT VT = Op.getOperand(0).getValueType();
8624       SDValue Zext = Op.getOperand(0);
8625       if (VT.bitsLT(MVT::i32)) {
8626         VT = MVT::i32;
8627         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
8628       }
8629       unsigned Log2b = Log2_32(VT.getSizeInBits());
8630       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
8631       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
8632                                 DAG.getConstant(Log2b, dl, MVT::i32));
8633       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
8634     }
8635   }
8636   return SDValue();
8637 }
8638 
8639 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
8640   SDValue Op0 = Node->getOperand(0);
8641   SDValue Op1 = Node->getOperand(1);
8642   EVT VT = Op0.getValueType();
8643   unsigned Opcode = Node->getOpcode();
8644   SDLoc DL(Node);
8645 
8646   // umin(x,y) -> sub(x,usubsat(x,y))
8647   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
8648       isOperationLegal(ISD::USUBSAT, VT)) {
8649     return DAG.getNode(ISD::SUB, DL, VT, Op0,
8650                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
8651   }
8652 
8653   // umax(x,y) -> add(x,usubsat(y,x))
8654   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
8655       isOperationLegal(ISD::USUBSAT, VT)) {
8656     return DAG.getNode(ISD::ADD, DL, VT, Op0,
8657                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
8658   }
8659 
8660   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
8661   ISD::CondCode CC;
8662   switch (Opcode) {
8663   default: llvm_unreachable("How did we get here?");
8664   case ISD::SMAX: CC = ISD::SETGT; break;
8665   case ISD::SMIN: CC = ISD::SETLT; break;
8666   case ISD::UMAX: CC = ISD::SETUGT; break;
8667   case ISD::UMIN: CC = ISD::SETULT; break;
8668   }
8669 
8670   // FIXME: Should really try to split the vector in case it's legal on a
8671   // subvector.
8672   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8673     return DAG.UnrollVectorOp(Node);
8674 
8675   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8676   SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
8677   return DAG.getSelect(DL, VT, Cond, Op0, Op1);
8678 }
8679 
8680 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
8681   unsigned Opcode = Node->getOpcode();
8682   SDValue LHS = Node->getOperand(0);
8683   SDValue RHS = Node->getOperand(1);
8684   EVT VT = LHS.getValueType();
8685   SDLoc dl(Node);
8686 
8687   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
8688   assert(VT.isInteger() && "Expected operands to be integers");
8689 
8690   // usub.sat(a, b) -> umax(a, b) - b
8691   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
8692     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
8693     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
8694   }
8695 
8696   // uadd.sat(a, b) -> umin(a, ~b) + b
8697   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
8698     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
8699     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
8700     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
8701   }
8702 
8703   unsigned OverflowOp;
8704   switch (Opcode) {
8705   case ISD::SADDSAT:
8706     OverflowOp = ISD::SADDO;
8707     break;
8708   case ISD::UADDSAT:
8709     OverflowOp = ISD::UADDO;
8710     break;
8711   case ISD::SSUBSAT:
8712     OverflowOp = ISD::SSUBO;
8713     break;
8714   case ISD::USUBSAT:
8715     OverflowOp = ISD::USUBO;
8716     break;
8717   default:
8718     llvm_unreachable("Expected method to receive signed or unsigned saturation "
8719                      "addition or subtraction node.");
8720   }
8721 
8722   // FIXME: Should really try to split the vector in case it's legal on a
8723   // subvector.
8724   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8725     return DAG.UnrollVectorOp(Node);
8726 
8727   unsigned BitWidth = LHS.getScalarValueSizeInBits();
8728   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8729   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8730   SDValue SumDiff = Result.getValue(0);
8731   SDValue Overflow = Result.getValue(1);
8732   SDValue Zero = DAG.getConstant(0, dl, VT);
8733   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
8734 
8735   if (Opcode == ISD::UADDSAT) {
8736     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8737       // (LHS + RHS) | OverflowMask
8738       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8739       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
8740     }
8741     // Overflow ? 0xffff.... : (LHS + RHS)
8742     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
8743   }
8744 
8745   if (Opcode == ISD::USUBSAT) {
8746     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8747       // (LHS - RHS) & ~OverflowMask
8748       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8749       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
8750       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
8751     }
8752     // Overflow ? 0 : (LHS - RHS)
8753     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
8754   }
8755 
8756   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
8757   APInt MinVal = APInt::getSignedMinValue(BitWidth);
8758   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8759   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
8760                               DAG.getConstant(BitWidth - 1, dl, VT));
8761   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
8762   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
8763 }
8764 
8765 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
8766   unsigned Opcode = Node->getOpcode();
8767   bool IsSigned = Opcode == ISD::SSHLSAT;
8768   SDValue LHS = Node->getOperand(0);
8769   SDValue RHS = Node->getOperand(1);
8770   EVT VT = LHS.getValueType();
8771   SDLoc dl(Node);
8772 
8773   assert((Node->getOpcode() == ISD::SSHLSAT ||
8774           Node->getOpcode() == ISD::USHLSAT) &&
8775           "Expected a SHLSAT opcode");
8776   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
8777   assert(VT.isInteger() && "Expected operands to be integers");
8778 
8779   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
8780 
8781   unsigned BW = VT.getScalarSizeInBits();
8782   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
8783   SDValue Orig =
8784       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
8785 
8786   SDValue SatVal;
8787   if (IsSigned) {
8788     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
8789     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
8790     SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
8791                              SatMin, SatMax, ISD::SETLT);
8792   } else {
8793     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
8794   }
8795   Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
8796 
8797   return Result;
8798 }
8799 
8800 SDValue
8801 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
8802   assert((Node->getOpcode() == ISD::SMULFIX ||
8803           Node->getOpcode() == ISD::UMULFIX ||
8804           Node->getOpcode() == ISD::SMULFIXSAT ||
8805           Node->getOpcode() == ISD::UMULFIXSAT) &&
8806          "Expected a fixed point multiplication opcode");
8807 
8808   SDLoc dl(Node);
8809   SDValue LHS = Node->getOperand(0);
8810   SDValue RHS = Node->getOperand(1);
8811   EVT VT = LHS.getValueType();
8812   unsigned Scale = Node->getConstantOperandVal(2);
8813   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
8814                      Node->getOpcode() == ISD::UMULFIXSAT);
8815   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
8816                  Node->getOpcode() == ISD::SMULFIXSAT);
8817   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8818   unsigned VTSize = VT.getScalarSizeInBits();
8819 
8820   if (!Scale) {
8821     // [us]mul.fix(a, b, 0) -> mul(a, b)
8822     if (!Saturating) {
8823       if (isOperationLegalOrCustom(ISD::MUL, VT))
8824         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8825     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
8826       SDValue Result =
8827           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8828       SDValue Product = Result.getValue(0);
8829       SDValue Overflow = Result.getValue(1);
8830       SDValue Zero = DAG.getConstant(0, dl, VT);
8831 
8832       APInt MinVal = APInt::getSignedMinValue(VTSize);
8833       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
8834       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8835       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8836       // Xor the inputs, if resulting sign bit is 0 the product will be
8837       // positive, else negative.
8838       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
8839       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
8840       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
8841       return DAG.getSelect(dl, VT, Overflow, Result, Product);
8842     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
8843       SDValue Result =
8844           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8845       SDValue Product = Result.getValue(0);
8846       SDValue Overflow = Result.getValue(1);
8847 
8848       APInt MaxVal = APInt::getMaxValue(VTSize);
8849       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8850       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
8851     }
8852   }
8853 
8854   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
8855          "Expected scale to be less than the number of bits if signed or at "
8856          "most the number of bits if unsigned.");
8857   assert(LHS.getValueType() == RHS.getValueType() &&
8858          "Expected both operands to be the same type");
8859 
8860   // Get the upper and lower bits of the result.
8861   SDValue Lo, Hi;
8862   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
8863   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
8864   if (isOperationLegalOrCustom(LoHiOp, VT)) {
8865     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
8866     Lo = Result.getValue(0);
8867     Hi = Result.getValue(1);
8868   } else if (isOperationLegalOrCustom(HiOp, VT)) {
8869     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8870     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
8871   } else if (VT.isVector()) {
8872     return SDValue();
8873   } else {
8874     report_fatal_error("Unable to expand fixed point multiplication.");
8875   }
8876 
8877   if (Scale == VTSize)
8878     // Result is just the top half since we'd be shifting by the width of the
8879     // operand. Overflow impossible so this works for both UMULFIX and
8880     // UMULFIXSAT.
8881     return Hi;
8882 
8883   // The result will need to be shifted right by the scale since both operands
8884   // are scaled. The result is given to us in 2 halves, so we only want part of
8885   // both in the result.
8886   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8887   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
8888                                DAG.getConstant(Scale, dl, ShiftTy));
8889   if (!Saturating)
8890     return Result;
8891 
8892   if (!Signed) {
8893     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
8894     // widened multiplication) aren't all zeroes.
8895 
8896     // Saturate to max if ((Hi >> Scale) != 0),
8897     // which is the same as if (Hi > ((1 << Scale) - 1))
8898     APInt MaxVal = APInt::getMaxValue(VTSize);
8899     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
8900                                       dl, VT);
8901     Result = DAG.getSelectCC(dl, Hi, LowMask,
8902                              DAG.getConstant(MaxVal, dl, VT), Result,
8903                              ISD::SETUGT);
8904 
8905     return Result;
8906   }
8907 
8908   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
8909   // widened multiplication) aren't all ones or all zeroes.
8910 
8911   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
8912   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
8913 
8914   if (Scale == 0) {
8915     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
8916                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
8917     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
8918     // Saturated to SatMin if wide product is negative, and SatMax if wide
8919     // product is positive ...
8920     SDValue Zero = DAG.getConstant(0, dl, VT);
8921     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
8922                                                ISD::SETLT);
8923     // ... but only if we overflowed.
8924     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
8925   }
8926 
8927   //  We handled Scale==0 above so all the bits to examine is in Hi.
8928 
8929   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
8930   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
8931   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
8932                                     dl, VT);
8933   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
8934   // Saturate to min if (Hi >> (Scale - 1)) < -1),
8935   // which is the same as if (HI < (-1 << (Scale - 1))
8936   SDValue HighMask =
8937       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
8938                       dl, VT);
8939   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
8940   return Result;
8941 }
8942 
8943 SDValue
8944 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
8945                                     SDValue LHS, SDValue RHS,
8946                                     unsigned Scale, SelectionDAG &DAG) const {
8947   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
8948           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
8949          "Expected a fixed point division opcode");
8950 
8951   EVT VT = LHS.getValueType();
8952   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
8953   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
8954   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8955 
8956   // If there is enough room in the type to upscale the LHS or downscale the
8957   // RHS before the division, we can perform it in this type without having to
8958   // resize. For signed operations, the LHS headroom is the number of
8959   // redundant sign bits, and for unsigned ones it is the number of zeroes.
8960   // The headroom for the RHS is the number of trailing zeroes.
8961   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
8962                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
8963   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
8964 
8965   // For signed saturating operations, we need to be able to detect true integer
8966   // division overflow; that is, when you have MIN / -EPS. However, this
8967   // is undefined behavior and if we emit divisions that could take such
8968   // values it may cause undesired behavior (arithmetic exceptions on x86, for
8969   // example).
8970   // Avoid this by requiring an extra bit so that we never get this case.
8971   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
8972   // signed saturating division, we need to emit a whopping 32-bit division.
8973   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
8974     return SDValue();
8975 
8976   unsigned LHSShift = std::min(LHSLead, Scale);
8977   unsigned RHSShift = Scale - LHSShift;
8978 
8979   // At this point, we know that if we shift the LHS up by LHSShift and the
8980   // RHS down by RHSShift, we can emit a regular division with a final scaling
8981   // factor of Scale.
8982 
8983   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8984   if (LHSShift)
8985     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
8986                       DAG.getConstant(LHSShift, dl, ShiftTy));
8987   if (RHSShift)
8988     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
8989                       DAG.getConstant(RHSShift, dl, ShiftTy));
8990 
8991   SDValue Quot;
8992   if (Signed) {
8993     // For signed operations, if the resulting quotient is negative and the
8994     // remainder is nonzero, subtract 1 from the quotient to round towards
8995     // negative infinity.
8996     SDValue Rem;
8997     // FIXME: Ideally we would always produce an SDIVREM here, but if the
8998     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
8999     // we couldn't just form a libcall, but the type legalizer doesn't do it.
9000     if (isTypeLegal(VT) &&
9001         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
9002       Quot = DAG.getNode(ISD::SDIVREM, dl,
9003                          DAG.getVTList(VT, VT),
9004                          LHS, RHS);
9005       Rem = Quot.getValue(1);
9006       Quot = Quot.getValue(0);
9007     } else {
9008       Quot = DAG.getNode(ISD::SDIV, dl, VT,
9009                          LHS, RHS);
9010       Rem = DAG.getNode(ISD::SREM, dl, VT,
9011                         LHS, RHS);
9012     }
9013     SDValue Zero = DAG.getConstant(0, dl, VT);
9014     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
9015     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
9016     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
9017     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
9018     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
9019                                DAG.getConstant(1, dl, VT));
9020     Quot = DAG.getSelect(dl, VT,
9021                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
9022                          Sub1, Quot);
9023   } else
9024     Quot = DAG.getNode(ISD::UDIV, dl, VT,
9025                        LHS, RHS);
9026 
9027   return Quot;
9028 }
9029 
9030 void TargetLowering::expandUADDSUBO(
9031     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9032   SDLoc dl(Node);
9033   SDValue LHS = Node->getOperand(0);
9034   SDValue RHS = Node->getOperand(1);
9035   bool IsAdd = Node->getOpcode() == ISD::UADDO;
9036 
9037   // If ADD/SUBCARRY is legal, use that instead.
9038   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
9039   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
9040     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
9041     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
9042                                     { LHS, RHS, CarryIn });
9043     Result = SDValue(NodeCarry.getNode(), 0);
9044     Overflow = SDValue(NodeCarry.getNode(), 1);
9045     return;
9046   }
9047 
9048   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9049                             LHS.getValueType(), LHS, RHS);
9050 
9051   EVT ResultType = Node->getValueType(1);
9052   EVT SetCCType = getSetCCResultType(
9053       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9054   SDValue SetCC;
9055   if (IsAdd && isOneConstant(RHS)) {
9056     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
9057     // the live range of X. We assume comparing with 0 is cheap.
9058     // The general case (X + C) < C is not necessarily beneficial. Although we
9059     // reduce the live range of X, we may introduce the materialization of
9060     // constant C.
9061     SetCC =
9062         DAG.getSetCC(dl, SetCCType, Result,
9063                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
9064   } else {
9065     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
9066     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
9067   }
9068   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9069 }
9070 
9071 void TargetLowering::expandSADDSUBO(
9072     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9073   SDLoc dl(Node);
9074   SDValue LHS = Node->getOperand(0);
9075   SDValue RHS = Node->getOperand(1);
9076   bool IsAdd = Node->getOpcode() == ISD::SADDO;
9077 
9078   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9079                             LHS.getValueType(), LHS, RHS);
9080 
9081   EVT ResultType = Node->getValueType(1);
9082   EVT OType = getSetCCResultType(
9083       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9084 
9085   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9086   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
9087   if (isOperationLegal(OpcSat, LHS.getValueType())) {
9088     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
9089     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
9090     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9091     return;
9092   }
9093 
9094   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
9095 
9096   // For an addition, the result should be less than one of the operands (LHS)
9097   // if and only if the other operand (RHS) is negative, otherwise there will
9098   // be overflow.
9099   // For a subtraction, the result should be less than one of the operands
9100   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9101   // otherwise there will be overflow.
9102   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
9103   SDValue ConditionRHS =
9104       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
9105 
9106   Overflow = DAG.getBoolExtOrTrunc(
9107       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
9108       ResultType, ResultType);
9109 }
9110 
9111 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
9112                                 SDValue &Overflow, SelectionDAG &DAG) const {
9113   SDLoc dl(Node);
9114   EVT VT = Node->getValueType(0);
9115   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9116   SDValue LHS = Node->getOperand(0);
9117   SDValue RHS = Node->getOperand(1);
9118   bool isSigned = Node->getOpcode() == ISD::SMULO;
9119 
9120   // For power-of-two multiplications we can use a simpler shift expansion.
9121   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
9122     const APInt &C = RHSC->getAPIntValue();
9123     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
9124     if (C.isPowerOf2()) {
9125       // smulo(x, signed_min) is same as umulo(x, signed_min).
9126       bool UseArithShift = isSigned && !C.isMinSignedValue();
9127       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
9128       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
9129       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
9130       Overflow = DAG.getSetCC(dl, SetCCVT,
9131           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
9132                       dl, VT, Result, ShiftAmt),
9133           LHS, ISD::SETNE);
9134       return true;
9135     }
9136   }
9137 
9138   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
9139   if (VT.isVector())
9140     WideVT =
9141         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
9142 
9143   SDValue BottomHalf;
9144   SDValue TopHalf;
9145   static const unsigned Ops[2][3] =
9146       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
9147         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
9148   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
9149     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
9150     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
9151   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
9152     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
9153                              RHS);
9154     TopHalf = BottomHalf.getValue(1);
9155   } else if (isTypeLegal(WideVT)) {
9156     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
9157     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
9158     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
9159     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
9160     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
9161         getShiftAmountTy(WideVT, DAG.getDataLayout()));
9162     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
9163                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
9164   } else {
9165     if (VT.isVector())
9166       return false;
9167 
9168     // We can fall back to a libcall with an illegal type for the MUL if we
9169     // have a libcall big enough.
9170     // Also, we can fall back to a division in some cases, but that's a big
9171     // performance hit in the general case.
9172     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
9173     if (WideVT == MVT::i16)
9174       LC = RTLIB::MUL_I16;
9175     else if (WideVT == MVT::i32)
9176       LC = RTLIB::MUL_I32;
9177     else if (WideVT == MVT::i64)
9178       LC = RTLIB::MUL_I64;
9179     else if (WideVT == MVT::i128)
9180       LC = RTLIB::MUL_I128;
9181     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
9182 
9183     SDValue HiLHS;
9184     SDValue HiRHS;
9185     if (isSigned) {
9186       // The high part is obtained by SRA'ing all but one of the bits of low
9187       // part.
9188       unsigned LoSize = VT.getFixedSizeInBits();
9189       HiLHS =
9190           DAG.getNode(ISD::SRA, dl, VT, LHS,
9191                       DAG.getConstant(LoSize - 1, dl,
9192                                       getPointerTy(DAG.getDataLayout())));
9193       HiRHS =
9194           DAG.getNode(ISD::SRA, dl, VT, RHS,
9195                       DAG.getConstant(LoSize - 1, dl,
9196                                       getPointerTy(DAG.getDataLayout())));
9197     } else {
9198         HiLHS = DAG.getConstant(0, dl, VT);
9199         HiRHS = DAG.getConstant(0, dl, VT);
9200     }
9201 
9202     // Here we're passing the 2 arguments explicitly as 4 arguments that are
9203     // pre-lowered to the correct types. This all depends upon WideVT not
9204     // being a legal type for the architecture and thus has to be split to
9205     // two arguments.
9206     SDValue Ret;
9207     TargetLowering::MakeLibCallOptions CallOptions;
9208     CallOptions.setSExt(isSigned);
9209     CallOptions.setIsPostTypeLegalization(true);
9210     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
9211       // Halves of WideVT are packed into registers in different order
9212       // depending on platform endianness. This is usually handled by
9213       // the C calling convention, but we can't defer to it in
9214       // the legalizer.
9215       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
9216       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
9217     } else {
9218       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
9219       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
9220     }
9221     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
9222            "Ret value is a collection of constituent nodes holding result.");
9223     if (DAG.getDataLayout().isLittleEndian()) {
9224       // Same as above.
9225       BottomHalf = Ret.getOperand(0);
9226       TopHalf = Ret.getOperand(1);
9227     } else {
9228       BottomHalf = Ret.getOperand(1);
9229       TopHalf = Ret.getOperand(0);
9230     }
9231   }
9232 
9233   Result = BottomHalf;
9234   if (isSigned) {
9235     SDValue ShiftAmt = DAG.getConstant(
9236         VT.getScalarSizeInBits() - 1, dl,
9237         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
9238     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
9239     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
9240   } else {
9241     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
9242                             DAG.getConstant(0, dl, VT), ISD::SETNE);
9243   }
9244 
9245   // Truncate the result if SetCC returns a larger type than needed.
9246   EVT RType = Node->getValueType(1);
9247   if (RType.bitsLT(Overflow.getValueType()))
9248     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
9249 
9250   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
9251          "Unexpected result type for S/UMULO legalization");
9252   return true;
9253 }
9254 
9255 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
9256   SDLoc dl(Node);
9257   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
9258   SDValue Op = Node->getOperand(0);
9259   EVT VT = Op.getValueType();
9260 
9261   if (VT.isScalableVector())
9262     report_fatal_error(
9263         "Expanding reductions for scalable vectors is undefined.");
9264 
9265   // Try to use a shuffle reduction for power of two vectors.
9266   if (VT.isPow2VectorType()) {
9267     while (VT.getVectorNumElements() > 1) {
9268       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9269       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
9270         break;
9271 
9272       SDValue Lo, Hi;
9273       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
9274       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
9275       VT = HalfVT;
9276     }
9277   }
9278 
9279   EVT EltVT = VT.getVectorElementType();
9280   unsigned NumElts = VT.getVectorNumElements();
9281 
9282   SmallVector<SDValue, 8> Ops;
9283   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
9284 
9285   SDValue Res = Ops[0];
9286   for (unsigned i = 1; i < NumElts; i++)
9287     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
9288 
9289   // Result type may be wider than element type.
9290   if (EltVT != Node->getValueType(0))
9291     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
9292   return Res;
9293 }
9294 
9295 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
9296   SDLoc dl(Node);
9297   SDValue AccOp = Node->getOperand(0);
9298   SDValue VecOp = Node->getOperand(1);
9299   SDNodeFlags Flags = Node->getFlags();
9300 
9301   EVT VT = VecOp.getValueType();
9302   EVT EltVT = VT.getVectorElementType();
9303 
9304   if (VT.isScalableVector())
9305     report_fatal_error(
9306         "Expanding reductions for scalable vectors is undefined.");
9307 
9308   unsigned NumElts = VT.getVectorNumElements();
9309 
9310   SmallVector<SDValue, 8> Ops;
9311   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
9312 
9313   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
9314 
9315   SDValue Res = AccOp;
9316   for (unsigned i = 0; i < NumElts; i++)
9317     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
9318 
9319   return Res;
9320 }
9321 
9322 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
9323                                SelectionDAG &DAG) const {
9324   EVT VT = Node->getValueType(0);
9325   SDLoc dl(Node);
9326   bool isSigned = Node->getOpcode() == ISD::SREM;
9327   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
9328   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
9329   SDValue Dividend = Node->getOperand(0);
9330   SDValue Divisor = Node->getOperand(1);
9331   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
9332     SDVTList VTs = DAG.getVTList(VT, VT);
9333     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
9334     return true;
9335   }
9336   if (isOperationLegalOrCustom(DivOpc, VT)) {
9337     // X % Y -> X-X/Y*Y
9338     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
9339     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
9340     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
9341     return true;
9342   }
9343   return false;
9344 }
9345 
9346 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
9347                                             SelectionDAG &DAG) const {
9348   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
9349   SDLoc dl(SDValue(Node, 0));
9350   SDValue Src = Node->getOperand(0);
9351 
9352   // DstVT is the result type, while SatVT is the size to which we saturate
9353   EVT SrcVT = Src.getValueType();
9354   EVT DstVT = Node->getValueType(0);
9355 
9356   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
9357   unsigned SatWidth = SatVT.getScalarSizeInBits();
9358   unsigned DstWidth = DstVT.getScalarSizeInBits();
9359   assert(SatWidth <= DstWidth &&
9360          "Expected saturation width smaller than result width");
9361 
9362   // Determine minimum and maximum integer values and their corresponding
9363   // floating-point values.
9364   APInt MinInt, MaxInt;
9365   if (IsSigned) {
9366     MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
9367     MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
9368   } else {
9369     MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
9370     MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
9371   }
9372 
9373   // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
9374   // libcall emission cannot handle this. Large result types will fail.
9375   if (SrcVT == MVT::f16) {
9376     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
9377     SrcVT = Src.getValueType();
9378   }
9379 
9380   APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
9381   APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
9382 
9383   APFloat::opStatus MinStatus =
9384       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
9385   APFloat::opStatus MaxStatus =
9386       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
9387   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
9388                              !(MaxStatus & APFloat::opStatus::opInexact);
9389 
9390   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
9391   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
9392 
9393   // If the integer bounds are exactly representable as floats and min/max are
9394   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
9395   // of comparisons and selects.
9396   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
9397                      isOperationLegal(ISD::FMAXNUM, SrcVT);
9398   if (AreExactFloatBounds && MinMaxLegal) {
9399     SDValue Clamped = Src;
9400 
9401     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
9402     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
9403     // Clamp by MaxFloat from above. NaN cannot occur.
9404     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
9405     // Convert clamped value to integer.
9406     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
9407                                   dl, DstVT, Clamped);
9408 
9409     // In the unsigned case we're done, because we mapped NaN to MinFloat,
9410     // which will cast to zero.
9411     if (!IsSigned)
9412       return FpToInt;
9413 
9414     // Otherwise, select 0 if Src is NaN.
9415     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
9416     return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
9417                            ISD::CondCode::SETUO);
9418   }
9419 
9420   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
9421   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
9422 
9423   // Result of direct conversion. The assumption here is that the operation is
9424   // non-trapping and it's fine to apply it to an out-of-range value if we
9425   // select it away later.
9426   SDValue FpToInt =
9427       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
9428 
9429   SDValue Select = FpToInt;
9430 
9431   // If Src ULT MinFloat, select MinInt. In particular, this also selects
9432   // MinInt if Src is NaN.
9433   Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
9434                            ISD::CondCode::SETULT);
9435   // If Src OGT MaxFloat, select MaxInt.
9436   Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
9437                            ISD::CondCode::SETOGT);
9438 
9439   // In the unsigned case we are done, because we mapped NaN to MinInt, which
9440   // is already zero.
9441   if (!IsSigned)
9442     return Select;
9443 
9444   // Otherwise, select 0 if Src is NaN.
9445   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
9446   return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
9447 }
9448 
9449 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
9450                                            SelectionDAG &DAG) const {
9451   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
9452   assert(Node->getValueType(0).isScalableVector() &&
9453          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
9454 
9455   EVT VT = Node->getValueType(0);
9456   SDValue V1 = Node->getOperand(0);
9457   SDValue V2 = Node->getOperand(1);
9458   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
9459   SDLoc DL(Node);
9460 
9461   // Expand through memory thusly:
9462   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
9463   //  Store V1, Ptr
9464   //  Store V2, Ptr + sizeof(V1)
9465   //  If (Imm < 0)
9466   //    TrailingElts = -Imm
9467   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
9468   //  else
9469   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
9470   //  Res = Load Ptr
9471 
9472   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
9473 
9474   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
9475                                VT.getVectorElementCount() * 2);
9476   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
9477   EVT PtrVT = StackPtr.getValueType();
9478   auto &MF = DAG.getMachineFunction();
9479   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9480   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
9481 
9482   // Store the lo part of CONCAT_VECTORS(V1, V2)
9483   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
9484   // Store the hi part of CONCAT_VECTORS(V1, V2)
9485   SDValue OffsetToV2 = DAG.getVScale(
9486       DL, PtrVT,
9487       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
9488   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
9489   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
9490 
9491   if (Imm >= 0) {
9492     // Load back the required element. getVectorElementPointer takes care of
9493     // clamping the index if it's out-of-bounds.
9494     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
9495     // Load the spliced result
9496     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
9497                        MachinePointerInfo::getUnknownStack(MF));
9498   }
9499 
9500   uint64_t TrailingElts = -Imm;
9501 
9502   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
9503   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
9504   SDValue TrailingBytes =
9505       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
9506 
9507   if (TrailingElts > VT.getVectorMinNumElements()) {
9508     SDValue VLBytes = DAG.getVScale(
9509         DL, PtrVT,
9510         APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
9511     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
9512   }
9513 
9514   // Calculate the start address of the spliced result.
9515   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
9516 
9517   // Load the spliced result
9518   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
9519                      MachinePointerInfo::getUnknownStack(MF));
9520 }
9521 
9522 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
9523                                            SDValue &LHS, SDValue &RHS,
9524                                            SDValue &CC, SDValue Mask,
9525                                            SDValue EVL, bool &NeedInvert,
9526                                            const SDLoc &dl, SDValue &Chain,
9527                                            bool IsSignaling) const {
9528   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9529   MVT OpVT = LHS.getSimpleValueType();
9530   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
9531   NeedInvert = false;
9532   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
9533   bool IsNonVP = !EVL;
9534   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
9535   default:
9536     llvm_unreachable("Unknown condition code action!");
9537   case TargetLowering::Legal:
9538     // Nothing to do.
9539     break;
9540   case TargetLowering::Expand: {
9541     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
9542     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9543       std::swap(LHS, RHS);
9544       CC = DAG.getCondCode(InvCC);
9545       return true;
9546     }
9547     // Swapping operands didn't work. Try inverting the condition.
9548     bool NeedSwap = false;
9549     InvCC = getSetCCInverse(CCCode, OpVT);
9550     if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9551       // If inverting the condition is not enough, try swapping operands
9552       // on top of it.
9553       InvCC = ISD::getSetCCSwappedOperands(InvCC);
9554       NeedSwap = true;
9555     }
9556     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9557       CC = DAG.getCondCode(InvCC);
9558       NeedInvert = true;
9559       if (NeedSwap)
9560         std::swap(LHS, RHS);
9561       return true;
9562     }
9563 
9564     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
9565     unsigned Opc = 0;
9566     switch (CCCode) {
9567     default:
9568       llvm_unreachable("Don't know how to expand this condition!");
9569     case ISD::SETUO:
9570       if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
9571         CC1 = ISD::SETUNE;
9572         CC2 = ISD::SETUNE;
9573         Opc = ISD::OR;
9574         break;
9575       }
9576       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
9577              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
9578       NeedInvert = true;
9579       LLVM_FALLTHROUGH;
9580     case ISD::SETO:
9581       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
9582              "If SETO is expanded, SETOEQ must be legal!");
9583       CC1 = ISD::SETOEQ;
9584       CC2 = ISD::SETOEQ;
9585       Opc = ISD::AND;
9586       break;
9587     case ISD::SETONE:
9588     case ISD::SETUEQ:
9589       // If the SETUO or SETO CC isn't legal, we might be able to use
9590       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
9591       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
9592       // the operands.
9593       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
9594       if (!TLI.isCondCodeLegal(CC2, OpVT) &&
9595           (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
9596            TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
9597         CC1 = ISD::SETOGT;
9598         CC2 = ISD::SETOLT;
9599         Opc = ISD::OR;
9600         NeedInvert = ((unsigned)CCCode & 0x8U);
9601         break;
9602       }
9603       LLVM_FALLTHROUGH;
9604     case ISD::SETOEQ:
9605     case ISD::SETOGT:
9606     case ISD::SETOGE:
9607     case ISD::SETOLT:
9608     case ISD::SETOLE:
9609     case ISD::SETUNE:
9610     case ISD::SETUGT:
9611     case ISD::SETUGE:
9612     case ISD::SETULT:
9613     case ISD::SETULE:
9614       // If we are floating point, assign and break, otherwise fall through.
9615       if (!OpVT.isInteger()) {
9616         // We can use the 4th bit to tell if we are the unordered
9617         // or ordered version of the opcode.
9618         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
9619         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
9620         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
9621         break;
9622       }
9623       // Fallthrough if we are unsigned integer.
9624       LLVM_FALLTHROUGH;
9625     case ISD::SETLE:
9626     case ISD::SETGT:
9627     case ISD::SETGE:
9628     case ISD::SETLT:
9629     case ISD::SETNE:
9630     case ISD::SETEQ:
9631       // If all combinations of inverting the condition and swapping operands
9632       // didn't work then we have no means to expand the condition.
9633       llvm_unreachable("Don't know how to expand this condition!");
9634     }
9635 
9636     SDValue SetCC1, SetCC2;
9637     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
9638       // If we aren't the ordered or unorder operation,
9639       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
9640       if (IsNonVP) {
9641         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
9642         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
9643       } else {
9644         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
9645         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
9646       }
9647     } else {
9648       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
9649       if (IsNonVP) {
9650         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
9651         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
9652       } else {
9653         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
9654         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
9655       }
9656     }
9657     if (Chain)
9658       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
9659                           SetCC2.getValue(1));
9660     if (IsNonVP)
9661       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
9662     else {
9663       // Transform the binary opcode to the VP equivalent.
9664       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
9665       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
9666       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
9667     }
9668     RHS = SDValue();
9669     CC = SDValue();
9670     return true;
9671   }
9672   }
9673   return false;
9674 }
9675