1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // First, check if tail calls have been disabled in this function.
56   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57     return false;
58 
59   // Conservatively require the attributes of the call to match those of
60   // the return. Ignore following attributes because they don't affect the
61   // call sequence.
62   AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex);
63   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
64                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
65                            Attribute::NonNull})
66     CallerAttrs.removeAttribute(Attr);
67 
68   if (CallerAttrs.hasAttributes())
69     return false;
70 
71   // It's not safe to eliminate the sign / zero extension of the return value.
72   if (CallerAttrs.contains(Attribute::ZExt) ||
73       CallerAttrs.contains(Attribute::SExt))
74     return false;
75 
76   // Check if the only use is a function return node.
77   return isUsedByReturnOnly(Node, Chain);
78 }
79 
80 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
81     const uint32_t *CallerPreservedMask,
82     const SmallVectorImpl<CCValAssign> &ArgLocs,
83     const SmallVectorImpl<SDValue> &OutVals) const {
84   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
85     const CCValAssign &ArgLoc = ArgLocs[I];
86     if (!ArgLoc.isRegLoc())
87       continue;
88     MCRegister Reg = ArgLoc.getLocReg();
89     // Only look at callee saved registers.
90     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
91       continue;
92     // Check that we pass the value used for the caller.
93     // (We look for a CopyFromReg reading a virtual register that is used
94     //  for the function live-in value of register Reg)
95     SDValue Value = OutVals[I];
96     if (Value->getOpcode() != ISD::CopyFromReg)
97       return false;
98     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
99     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
100       return false;
101   }
102   return true;
103 }
104 
105 /// Set CallLoweringInfo attribute flags based on the call instruction's
106 /// argument attributes.
107 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
108                                                      unsigned ArgIdx) {
109   auto Attrs = Call->getAttributes();
110 
111   IsSExt = Attrs.hasParamAttribute(ArgIdx, Attribute::SExt);
112   IsZExt = Attrs.hasParamAttribute(ArgIdx, Attribute::ZExt);
113   IsInReg = Attrs.hasParamAttribute(ArgIdx, Attribute::InReg);
114   IsSRet = Attrs.hasParamAttribute(ArgIdx, Attribute::StructRet);
115   IsNest = Attrs.hasParamAttribute(ArgIdx, Attribute::Nest);
116   IsReturned = Attrs.hasParamAttribute(ArgIdx, Attribute::Returned);
117   IsSwiftSelf = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftSelf);
118   IsSwiftAsync = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftAsync);
119   IsSwiftError = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftError);
120   Alignment = Attrs.getParamStackAlignment(ArgIdx);
121 
122   IsByVal = Attrs.hasParamAttribute(ArgIdx, Attribute::ByVal);
123   ByValType = nullptr;
124   if (IsByVal) {
125     ByValType = Call->getParamByValType(ArgIdx);
126     if (!Alignment)
127       Alignment = Call->getParamAlign(ArgIdx);
128   }
129   IsInAlloca = Attrs.hasParamAttribute(ArgIdx, Attribute::InAlloca);
130   IsPreallocated = Attrs.hasParamAttribute(ArgIdx, Attribute::Preallocated);
131   PreallocatedType = nullptr;
132   if (IsPreallocated)
133     PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
134 }
135 
136 /// Generate a libcall taking the given operands as arguments and returning a
137 /// result of type RetVT.
138 std::pair<SDValue, SDValue>
139 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
140                             ArrayRef<SDValue> Ops,
141                             MakeLibCallOptions CallOptions,
142                             const SDLoc &dl,
143                             SDValue InChain) const {
144   if (!InChain)
145     InChain = DAG.getEntryNode();
146 
147   TargetLowering::ArgListTy Args;
148   Args.reserve(Ops.size());
149 
150   TargetLowering::ArgListEntry Entry;
151   for (unsigned i = 0; i < Ops.size(); ++i) {
152     SDValue NewOp = Ops[i];
153     Entry.Node = NewOp;
154     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
155     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
156                                                  CallOptions.IsSExt);
157     Entry.IsZExt = !Entry.IsSExt;
158 
159     if (CallOptions.IsSoften &&
160         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
161       Entry.IsSExt = Entry.IsZExt = false;
162     }
163     Args.push_back(Entry);
164   }
165 
166   if (LC == RTLIB::UNKNOWN_LIBCALL)
167     report_fatal_error("Unsupported library call operation!");
168   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
169                                          getPointerTy(DAG.getDataLayout()));
170 
171   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
172   TargetLowering::CallLoweringInfo CLI(DAG);
173   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
174   bool zeroExtend = !signExtend;
175 
176   if (CallOptions.IsSoften &&
177       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
178     signExtend = zeroExtend = false;
179   }
180 
181   CLI.setDebugLoc(dl)
182       .setChain(InChain)
183       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
184       .setNoReturn(CallOptions.DoesNotReturn)
185       .setDiscardResult(!CallOptions.IsReturnValueUsed)
186       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
187       .setSExtResult(signExtend)
188       .setZExtResult(zeroExtend);
189   return LowerCallTo(CLI);
190 }
191 
192 bool TargetLowering::findOptimalMemOpLowering(
193     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
194     unsigned SrcAS, const AttributeList &FuncAttributes) const {
195   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
196     return false;
197 
198   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
199 
200   if (VT == MVT::Other) {
201     // Use the largest integer type whose alignment constraints are satisfied.
202     // We only need to check DstAlign here as SrcAlign is always greater or
203     // equal to DstAlign (or zero).
204     VT = MVT::i64;
205     if (Op.isFixedDstAlign())
206       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
207              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
208         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
209     assert(VT.isInteger());
210 
211     // Find the largest legal integer type.
212     MVT LVT = MVT::i64;
213     while (!isTypeLegal(LVT))
214       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
215     assert(LVT.isInteger());
216 
217     // If the type we've chosen is larger than the largest legal integer type
218     // then use that instead.
219     if (VT.bitsGT(LVT))
220       VT = LVT;
221   }
222 
223   unsigned NumMemOps = 0;
224   uint64_t Size = Op.size();
225   while (Size) {
226     unsigned VTSize = VT.getSizeInBits() / 8;
227     while (VTSize > Size) {
228       // For now, only use non-vector load / store's for the left-over pieces.
229       EVT NewVT = VT;
230       unsigned NewVTSize;
231 
232       bool Found = false;
233       if (VT.isVector() || VT.isFloatingPoint()) {
234         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
235         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
236             isSafeMemOpType(NewVT.getSimpleVT()))
237           Found = true;
238         else if (NewVT == MVT::i64 &&
239                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
240                  isSafeMemOpType(MVT::f64)) {
241           // i64 is usually not legal on 32-bit targets, but f64 may be.
242           NewVT = MVT::f64;
243           Found = true;
244         }
245       }
246 
247       if (!Found) {
248         do {
249           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
250           if (NewVT == MVT::i8)
251             break;
252         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
253       }
254       NewVTSize = NewVT.getSizeInBits() / 8;
255 
256       // If the new VT cannot cover all of the remaining bits, then consider
257       // issuing a (or a pair of) unaligned and overlapping load / store.
258       bool Fast;
259       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
260           allowsMisalignedMemoryAccesses(
261               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
262               MachineMemOperand::MONone, &Fast) &&
263           Fast)
264         VTSize = Size;
265       else {
266         VT = NewVT;
267         VTSize = NewVTSize;
268       }
269     }
270 
271     if (++NumMemOps > Limit)
272       return false;
273 
274     MemOps.push_back(VT);
275     Size -= VTSize;
276   }
277 
278   return true;
279 }
280 
281 /// Soften the operands of a comparison. This code is shared among BR_CC,
282 /// SELECT_CC, and SETCC handlers.
283 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
284                                          SDValue &NewLHS, SDValue &NewRHS,
285                                          ISD::CondCode &CCCode,
286                                          const SDLoc &dl, const SDValue OldLHS,
287                                          const SDValue OldRHS) const {
288   SDValue Chain;
289   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
290                              OldRHS, Chain);
291 }
292 
293 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
294                                          SDValue &NewLHS, SDValue &NewRHS,
295                                          ISD::CondCode &CCCode,
296                                          const SDLoc &dl, const SDValue OldLHS,
297                                          const SDValue OldRHS,
298                                          SDValue &Chain,
299                                          bool IsSignaling) const {
300   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
301   // not supporting it. We can update this code when libgcc provides such
302   // functions.
303 
304   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
305          && "Unsupported setcc type!");
306 
307   // Expand into one or more soft-fp libcall(s).
308   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
309   bool ShouldInvertCC = false;
310   switch (CCCode) {
311   case ISD::SETEQ:
312   case ISD::SETOEQ:
313     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
314           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
315           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
316     break;
317   case ISD::SETNE:
318   case ISD::SETUNE:
319     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
320           (VT == MVT::f64) ? RTLIB::UNE_F64 :
321           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
322     break;
323   case ISD::SETGE:
324   case ISD::SETOGE:
325     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
326           (VT == MVT::f64) ? RTLIB::OGE_F64 :
327           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
328     break;
329   case ISD::SETLT:
330   case ISD::SETOLT:
331     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
332           (VT == MVT::f64) ? RTLIB::OLT_F64 :
333           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
334     break;
335   case ISD::SETLE:
336   case ISD::SETOLE:
337     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
338           (VT == MVT::f64) ? RTLIB::OLE_F64 :
339           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
340     break;
341   case ISD::SETGT:
342   case ISD::SETOGT:
343     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
344           (VT == MVT::f64) ? RTLIB::OGT_F64 :
345           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
346     break;
347   case ISD::SETO:
348     ShouldInvertCC = true;
349     LLVM_FALLTHROUGH;
350   case ISD::SETUO:
351     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
352           (VT == MVT::f64) ? RTLIB::UO_F64 :
353           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
354     break;
355   case ISD::SETONE:
356     // SETONE = O && UNE
357     ShouldInvertCC = true;
358     LLVM_FALLTHROUGH;
359   case ISD::SETUEQ:
360     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361           (VT == MVT::f64) ? RTLIB::UO_F64 :
362           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
364           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
365           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
366     break;
367   default:
368     // Invert CC for unordered comparisons
369     ShouldInvertCC = true;
370     switch (CCCode) {
371     case ISD::SETULT:
372       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
373             (VT == MVT::f64) ? RTLIB::OGE_F64 :
374             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
375       break;
376     case ISD::SETULE:
377       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
378             (VT == MVT::f64) ? RTLIB::OGT_F64 :
379             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
380       break;
381     case ISD::SETUGT:
382       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
383             (VT == MVT::f64) ? RTLIB::OLE_F64 :
384             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
385       break;
386     case ISD::SETUGE:
387       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
388             (VT == MVT::f64) ? RTLIB::OLT_F64 :
389             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
390       break;
391     default: llvm_unreachable("Do not know how to soften this setcc!");
392     }
393   }
394 
395   // Use the target specific return value for comparions lib calls.
396   EVT RetVT = getCmpLibcallReturnType();
397   SDValue Ops[2] = {NewLHS, NewRHS};
398   TargetLowering::MakeLibCallOptions CallOptions;
399   EVT OpsVT[2] = { OldLHS.getValueType(),
400                    OldRHS.getValueType() };
401   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
402   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
403   NewLHS = Call.first;
404   NewRHS = DAG.getConstant(0, dl, RetVT);
405 
406   CCCode = getCmpLibcallCC(LC1);
407   if (ShouldInvertCC) {
408     assert(RetVT.isInteger());
409     CCCode = getSetCCInverse(CCCode, RetVT);
410   }
411 
412   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
413     // Update Chain.
414     Chain = Call.second;
415   } else {
416     EVT SetCCVT =
417         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
418     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
419     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
420     CCCode = getCmpLibcallCC(LC2);
421     if (ShouldInvertCC)
422       CCCode = getSetCCInverse(CCCode, RetVT);
423     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
424     if (Chain)
425       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
426                           Call2.second);
427     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
428                          Tmp.getValueType(), Tmp, NewLHS);
429     NewRHS = SDValue();
430   }
431 }
432 
433 /// Return the entry encoding for a jump table in the current function. The
434 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
435 unsigned TargetLowering::getJumpTableEncoding() const {
436   // In non-pic modes, just use the address of a block.
437   if (!isPositionIndependent())
438     return MachineJumpTableInfo::EK_BlockAddress;
439 
440   // In PIC mode, if the target supports a GPRel32 directive, use it.
441   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
442     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
443 
444   // Otherwise, use a label difference.
445   return MachineJumpTableInfo::EK_LabelDifference32;
446 }
447 
448 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
449                                                  SelectionDAG &DAG) const {
450   // If our PIC model is GP relative, use the global offset table as the base.
451   unsigned JTEncoding = getJumpTableEncoding();
452 
453   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
454       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
455     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
456 
457   return Table;
458 }
459 
460 /// This returns the relocation base for the given PIC jumptable, the same as
461 /// getPICJumpTableRelocBase, but as an MCExpr.
462 const MCExpr *
463 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
464                                              unsigned JTI,MCContext &Ctx) const{
465   // The normal PIC reloc base is the label at the start of the jump table.
466   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
467 }
468 
469 bool
470 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
471   const TargetMachine &TM = getTargetMachine();
472   const GlobalValue *GV = GA->getGlobal();
473 
474   // If the address is not even local to this DSO we will have to load it from
475   // a got and then add the offset.
476   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
477     return false;
478 
479   // If the code is position independent we will have to add a base register.
480   if (isPositionIndependent())
481     return false;
482 
483   // Otherwise we can do it.
484   return true;
485 }
486 
487 //===----------------------------------------------------------------------===//
488 //  Optimization Methods
489 //===----------------------------------------------------------------------===//
490 
491 /// If the specified instruction has a constant integer operand and there are
492 /// bits set in that constant that are not demanded, then clear those bits and
493 /// return true.
494 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
495                                             const APInt &DemandedBits,
496                                             const APInt &DemandedElts,
497                                             TargetLoweringOpt &TLO) const {
498   SDLoc DL(Op);
499   unsigned Opcode = Op.getOpcode();
500 
501   // Do target-specific constant optimization.
502   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
503     return TLO.New.getNode();
504 
505   // FIXME: ISD::SELECT, ISD::SELECT_CC
506   switch (Opcode) {
507   default:
508     break;
509   case ISD::XOR:
510   case ISD::AND:
511   case ISD::OR: {
512     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
513     if (!Op1C)
514       return false;
515 
516     // If this is a 'not' op, don't touch it because that's a canonical form.
517     const APInt &C = Op1C->getAPIntValue();
518     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
519       return false;
520 
521     if (!C.isSubsetOf(DemandedBits)) {
522       EVT VT = Op.getValueType();
523       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
524       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
525       return TLO.CombineTo(Op, NewOp);
526     }
527 
528     break;
529   }
530   }
531 
532   return false;
533 }
534 
535 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
536                                             const APInt &DemandedBits,
537                                             TargetLoweringOpt &TLO) const {
538   EVT VT = Op.getValueType();
539   APInt DemandedElts = VT.isVector()
540                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
541                            : APInt(1, 1);
542   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
543 }
544 
545 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
546 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
547 /// generalized for targets with other types of implicit widening casts.
548 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
549                                       const APInt &Demanded,
550                                       TargetLoweringOpt &TLO) const {
551   assert(Op.getNumOperands() == 2 &&
552          "ShrinkDemandedOp only supports binary operators!");
553   assert(Op.getNode()->getNumValues() == 1 &&
554          "ShrinkDemandedOp only supports nodes with one result!");
555 
556   SelectionDAG &DAG = TLO.DAG;
557   SDLoc dl(Op);
558 
559   // Early return, as this function cannot handle vector types.
560   if (Op.getValueType().isVector())
561     return false;
562 
563   // Don't do this if the node has another user, which may require the
564   // full value.
565   if (!Op.getNode()->hasOneUse())
566     return false;
567 
568   // Search for the smallest integer type with free casts to and from
569   // Op's type. For expedience, just check power-of-2 integer types.
570   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
571   unsigned DemandedSize = Demanded.getActiveBits();
572   unsigned SmallVTBits = DemandedSize;
573   if (!isPowerOf2_32(SmallVTBits))
574     SmallVTBits = NextPowerOf2(SmallVTBits);
575   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
576     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
577     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
578         TLI.isZExtFree(SmallVT, Op.getValueType())) {
579       // We found a type with free casts.
580       SDValue X = DAG.getNode(
581           Op.getOpcode(), dl, SmallVT,
582           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
583           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
584       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
585       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
586       return TLO.CombineTo(Op, Z);
587     }
588   }
589   return false;
590 }
591 
592 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
593                                           DAGCombinerInfo &DCI) const {
594   SelectionDAG &DAG = DCI.DAG;
595   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
596                         !DCI.isBeforeLegalizeOps());
597   KnownBits Known;
598 
599   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
600   if (Simplified) {
601     DCI.AddToWorklist(Op.getNode());
602     DCI.CommitTargetLoweringOpt(TLO);
603   }
604   return Simplified;
605 }
606 
607 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
608                                           KnownBits &Known,
609                                           TargetLoweringOpt &TLO,
610                                           unsigned Depth,
611                                           bool AssumeSingleUse) const {
612   EVT VT = Op.getValueType();
613 
614   // TODO: We can probably do more work on calculating the known bits and
615   // simplifying the operations for scalable vectors, but for now we just
616   // bail out.
617   if (VT.isScalableVector()) {
618     // Pretend we don't know anything for now.
619     Known = KnownBits(DemandedBits.getBitWidth());
620     return false;
621   }
622 
623   APInt DemandedElts = VT.isVector()
624                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
625                            : APInt(1, 1);
626   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
627                               AssumeSingleUse);
628 }
629 
630 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
631 // TODO: Under what circumstances can we create nodes? Constant folding?
632 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
633     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
634     SelectionDAG &DAG, unsigned Depth) const {
635   // Limit search depth.
636   if (Depth >= SelectionDAG::MaxRecursionDepth)
637     return SDValue();
638 
639   // Ignore UNDEFs.
640   if (Op.isUndef())
641     return SDValue();
642 
643   // Not demanding any bits/elts from Op.
644   if (DemandedBits == 0 || DemandedElts == 0)
645     return DAG.getUNDEF(Op.getValueType());
646 
647   unsigned NumElts = DemandedElts.getBitWidth();
648   unsigned BitWidth = DemandedBits.getBitWidth();
649   KnownBits LHSKnown, RHSKnown;
650   switch (Op.getOpcode()) {
651   case ISD::BITCAST: {
652     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
653     EVT SrcVT = Src.getValueType();
654     EVT DstVT = Op.getValueType();
655     if (SrcVT == DstVT)
656       return Src;
657 
658     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
659     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
660     if (NumSrcEltBits == NumDstEltBits)
661       if (SDValue V = SimplifyMultipleUseDemandedBits(
662               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
663         return DAG.getBitcast(DstVT, V);
664 
665     // TODO - bigendian once we have test coverage.
666     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
667         DAG.getDataLayout().isLittleEndian()) {
668       unsigned Scale = NumDstEltBits / NumSrcEltBits;
669       unsigned NumSrcElts = SrcVT.getVectorNumElements();
670       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
671       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
672       for (unsigned i = 0; i != Scale; ++i) {
673         unsigned Offset = i * NumSrcEltBits;
674         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
675         if (!Sub.isNullValue()) {
676           DemandedSrcBits |= Sub;
677           for (unsigned j = 0; j != NumElts; ++j)
678             if (DemandedElts[j])
679               DemandedSrcElts.setBit((j * Scale) + i);
680         }
681       }
682 
683       if (SDValue V = SimplifyMultipleUseDemandedBits(
684               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
685         return DAG.getBitcast(DstVT, V);
686     }
687 
688     // TODO - bigendian once we have test coverage.
689     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
690         DAG.getDataLayout().isLittleEndian()) {
691       unsigned Scale = NumSrcEltBits / NumDstEltBits;
692       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
693       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
694       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
695       for (unsigned i = 0; i != NumElts; ++i)
696         if (DemandedElts[i]) {
697           unsigned Offset = (i % Scale) * NumDstEltBits;
698           DemandedSrcBits.insertBits(DemandedBits, Offset);
699           DemandedSrcElts.setBit(i / Scale);
700         }
701 
702       if (SDValue V = SimplifyMultipleUseDemandedBits(
703               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
704         return DAG.getBitcast(DstVT, V);
705     }
706 
707     break;
708   }
709   case ISD::AND: {
710     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
711     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
712 
713     // If all of the demanded bits are known 1 on one side, return the other.
714     // These bits cannot contribute to the result of the 'and' in this
715     // context.
716     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
717       return Op.getOperand(0);
718     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
719       return Op.getOperand(1);
720     break;
721   }
722   case ISD::OR: {
723     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
724     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
725 
726     // If all of the demanded bits are known zero on one side, return the
727     // other.  These bits cannot contribute to the result of the 'or' in this
728     // context.
729     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
730       return Op.getOperand(0);
731     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
732       return Op.getOperand(1);
733     break;
734   }
735   case ISD::XOR: {
736     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
737     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
738 
739     // If all of the demanded bits are known zero on one side, return the
740     // other.
741     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
742       return Op.getOperand(0);
743     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
744       return Op.getOperand(1);
745     break;
746   }
747   case ISD::SHL: {
748     // If we are only demanding sign bits then we can use the shift source
749     // directly.
750     if (const APInt *MaxSA =
751             DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
752       SDValue Op0 = Op.getOperand(0);
753       unsigned ShAmt = MaxSA->getZExtValue();
754       unsigned NumSignBits =
755           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
756       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
757       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
758         return Op0;
759     }
760     break;
761   }
762   case ISD::SETCC: {
763     SDValue Op0 = Op.getOperand(0);
764     SDValue Op1 = Op.getOperand(1);
765     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
766     // If (1) we only need the sign-bit, (2) the setcc operands are the same
767     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
768     // -1, we may be able to bypass the setcc.
769     if (DemandedBits.isSignMask() &&
770         Op0.getScalarValueSizeInBits() == BitWidth &&
771         getBooleanContents(Op0.getValueType()) ==
772             BooleanContent::ZeroOrNegativeOneBooleanContent) {
773       // If we're testing X < 0, then this compare isn't needed - just use X!
774       // FIXME: We're limiting to integer types here, but this should also work
775       // if we don't care about FP signed-zero. The use of SETLT with FP means
776       // that we don't care about NaNs.
777       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
778           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
779         return Op0;
780     }
781     break;
782   }
783   case ISD::SIGN_EXTEND_INREG: {
784     // If none of the extended bits are demanded, eliminate the sextinreg.
785     SDValue Op0 = Op.getOperand(0);
786     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
787     unsigned ExBits = ExVT.getScalarSizeInBits();
788     if (DemandedBits.getActiveBits() <= ExBits)
789       return Op0;
790     // If the input is already sign extended, just drop the extension.
791     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
792     if (NumSignBits >= (BitWidth - ExBits + 1))
793       return Op0;
794     break;
795   }
796   case ISD::ANY_EXTEND_VECTOR_INREG:
797   case ISD::SIGN_EXTEND_VECTOR_INREG:
798   case ISD::ZERO_EXTEND_VECTOR_INREG: {
799     // If we only want the lowest element and none of extended bits, then we can
800     // return the bitcasted source vector.
801     SDValue Src = Op.getOperand(0);
802     EVT SrcVT = Src.getValueType();
803     EVT DstVT = Op.getValueType();
804     if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
805         DAG.getDataLayout().isLittleEndian() &&
806         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
807       return DAG.getBitcast(DstVT, Src);
808     }
809     break;
810   }
811   case ISD::INSERT_VECTOR_ELT: {
812     // If we don't demand the inserted element, return the base vector.
813     SDValue Vec = Op.getOperand(0);
814     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
815     EVT VecVT = Vec.getValueType();
816     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
817         !DemandedElts[CIdx->getZExtValue()])
818       return Vec;
819     break;
820   }
821   case ISD::INSERT_SUBVECTOR: {
822     // If we don't demand the inserted subvector, return the base vector.
823     SDValue Vec = Op.getOperand(0);
824     SDValue Sub = Op.getOperand(1);
825     uint64_t Idx = Op.getConstantOperandVal(2);
826     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
827     if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
828       return Vec;
829     break;
830   }
831   case ISD::VECTOR_SHUFFLE: {
832     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
833 
834     // If all the demanded elts are from one operand and are inline,
835     // then we can use the operand directly.
836     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
837     for (unsigned i = 0; i != NumElts; ++i) {
838       int M = ShuffleMask[i];
839       if (M < 0 || !DemandedElts[i])
840         continue;
841       AllUndef = false;
842       IdentityLHS &= (M == (int)i);
843       IdentityRHS &= ((M - NumElts) == i);
844     }
845 
846     if (AllUndef)
847       return DAG.getUNDEF(Op.getValueType());
848     if (IdentityLHS)
849       return Op.getOperand(0);
850     if (IdentityRHS)
851       return Op.getOperand(1);
852     break;
853   }
854   default:
855     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
856       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
857               Op, DemandedBits, DemandedElts, DAG, Depth))
858         return V;
859     break;
860   }
861   return SDValue();
862 }
863 
864 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
865     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
866     unsigned Depth) const {
867   EVT VT = Op.getValueType();
868   APInt DemandedElts = VT.isVector()
869                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
870                            : APInt(1, 1);
871   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
872                                          Depth);
873 }
874 
875 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
876     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
877     unsigned Depth) const {
878   APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
879   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
880                                          Depth);
881 }
882 
883 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
884 /// result of Op are ever used downstream. If we can use this information to
885 /// simplify Op, create a new simplified DAG node and return true, returning the
886 /// original and new nodes in Old and New. Otherwise, analyze the expression and
887 /// return a mask of Known bits for the expression (used to simplify the
888 /// caller).  The Known bits may only be accurate for those bits in the
889 /// OriginalDemandedBits and OriginalDemandedElts.
890 bool TargetLowering::SimplifyDemandedBits(
891     SDValue Op, const APInt &OriginalDemandedBits,
892     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
893     unsigned Depth, bool AssumeSingleUse) const {
894   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
895   assert(Op.getScalarValueSizeInBits() == BitWidth &&
896          "Mask size mismatches value type size!");
897 
898   // Don't know anything.
899   Known = KnownBits(BitWidth);
900 
901   // TODO: We can probably do more work on calculating the known bits and
902   // simplifying the operations for scalable vectors, but for now we just
903   // bail out.
904   if (Op.getValueType().isScalableVector())
905     return false;
906 
907   unsigned NumElts = OriginalDemandedElts.getBitWidth();
908   assert((!Op.getValueType().isVector() ||
909           NumElts == Op.getValueType().getVectorNumElements()) &&
910          "Unexpected vector size");
911 
912   APInt DemandedBits = OriginalDemandedBits;
913   APInt DemandedElts = OriginalDemandedElts;
914   SDLoc dl(Op);
915   auto &DL = TLO.DAG.getDataLayout();
916 
917   // Undef operand.
918   if (Op.isUndef())
919     return false;
920 
921   if (Op.getOpcode() == ISD::Constant) {
922     // We know all of the bits for a constant!
923     Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
924     return false;
925   }
926 
927   if (Op.getOpcode() == ISD::ConstantFP) {
928     // We know all of the bits for a floating point constant!
929     Known = KnownBits::makeConstant(
930         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
931     return false;
932   }
933 
934   // Other users may use these bits.
935   EVT VT = Op.getValueType();
936   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
937     if (Depth != 0) {
938       // If not at the root, Just compute the Known bits to
939       // simplify things downstream.
940       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
941       return false;
942     }
943     // If this is the root being simplified, allow it to have multiple uses,
944     // just set the DemandedBits/Elts to all bits.
945     DemandedBits = APInt::getAllOnesValue(BitWidth);
946     DemandedElts = APInt::getAllOnesValue(NumElts);
947   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
948     // Not demanding any bits/elts from Op.
949     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
950   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
951     // Limit search depth.
952     return false;
953   }
954 
955   KnownBits Known2;
956   switch (Op.getOpcode()) {
957   case ISD::TargetConstant:
958     llvm_unreachable("Can't simplify this node");
959   case ISD::SCALAR_TO_VECTOR: {
960     if (!DemandedElts[0])
961       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
962 
963     KnownBits SrcKnown;
964     SDValue Src = Op.getOperand(0);
965     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
966     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
967     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
968       return true;
969 
970     // Upper elements are undef, so only get the knownbits if we just demand
971     // the bottom element.
972     if (DemandedElts == 1)
973       Known = SrcKnown.anyextOrTrunc(BitWidth);
974     break;
975   }
976   case ISD::BUILD_VECTOR:
977     // Collect the known bits that are shared by every demanded element.
978     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
979     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
980     return false; // Don't fall through, will infinitely loop.
981   case ISD::LOAD: {
982     auto *LD = cast<LoadSDNode>(Op);
983     if (getTargetConstantFromLoad(LD)) {
984       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
985       return false; // Don't fall through, will infinitely loop.
986     }
987     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
988       // If this is a ZEXTLoad and we are looking at the loaded value.
989       EVT MemVT = LD->getMemoryVT();
990       unsigned MemBits = MemVT.getScalarSizeInBits();
991       Known.Zero.setBitsFrom(MemBits);
992       return false; // Don't fall through, will infinitely loop.
993     }
994     break;
995   }
996   case ISD::INSERT_VECTOR_ELT: {
997     SDValue Vec = Op.getOperand(0);
998     SDValue Scl = Op.getOperand(1);
999     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1000     EVT VecVT = Vec.getValueType();
1001 
1002     // If index isn't constant, assume we need all vector elements AND the
1003     // inserted element.
1004     APInt DemandedVecElts(DemandedElts);
1005     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1006       unsigned Idx = CIdx->getZExtValue();
1007       DemandedVecElts.clearBit(Idx);
1008 
1009       // Inserted element is not required.
1010       if (!DemandedElts[Idx])
1011         return TLO.CombineTo(Op, Vec);
1012     }
1013 
1014     KnownBits KnownScl;
1015     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1016     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1017     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1018       return true;
1019 
1020     Known = KnownScl.anyextOrTrunc(BitWidth);
1021 
1022     KnownBits KnownVec;
1023     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1024                              Depth + 1))
1025       return true;
1026 
1027     if (!!DemandedVecElts)
1028       Known = KnownBits::commonBits(Known, KnownVec);
1029 
1030     return false;
1031   }
1032   case ISD::INSERT_SUBVECTOR: {
1033     // Demand any elements from the subvector and the remainder from the src its
1034     // inserted into.
1035     SDValue Src = Op.getOperand(0);
1036     SDValue Sub = Op.getOperand(1);
1037     uint64_t Idx = Op.getConstantOperandVal(2);
1038     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1039     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1040     APInt DemandedSrcElts = DemandedElts;
1041     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
1042 
1043     KnownBits KnownSub, KnownSrc;
1044     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1045                              Depth + 1))
1046       return true;
1047     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1048                              Depth + 1))
1049       return true;
1050 
1051     Known.Zero.setAllBits();
1052     Known.One.setAllBits();
1053     if (!!DemandedSubElts)
1054       Known = KnownBits::commonBits(Known, KnownSub);
1055     if (!!DemandedSrcElts)
1056       Known = KnownBits::commonBits(Known, KnownSrc);
1057 
1058     // Attempt to avoid multi-use src if we don't need anything from it.
1059     if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
1060         !DemandedSrcElts.isAllOnesValue()) {
1061       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1062           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1063       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1064           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1065       if (NewSub || NewSrc) {
1066         NewSub = NewSub ? NewSub : Sub;
1067         NewSrc = NewSrc ? NewSrc : Src;
1068         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1069                                         Op.getOperand(2));
1070         return TLO.CombineTo(Op, NewOp);
1071       }
1072     }
1073     break;
1074   }
1075   case ISD::EXTRACT_SUBVECTOR: {
1076     // Offset the demanded elts by the subvector index.
1077     SDValue Src = Op.getOperand(0);
1078     if (Src.getValueType().isScalableVector())
1079       break;
1080     uint64_t Idx = Op.getConstantOperandVal(1);
1081     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1082     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1083 
1084     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1085                              Depth + 1))
1086       return true;
1087 
1088     // Attempt to avoid multi-use src if we don't need anything from it.
1089     if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
1090       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1091           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1092       if (DemandedSrc) {
1093         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1094                                         Op.getOperand(1));
1095         return TLO.CombineTo(Op, NewOp);
1096       }
1097     }
1098     break;
1099   }
1100   case ISD::CONCAT_VECTORS: {
1101     Known.Zero.setAllBits();
1102     Known.One.setAllBits();
1103     EVT SubVT = Op.getOperand(0).getValueType();
1104     unsigned NumSubVecs = Op.getNumOperands();
1105     unsigned NumSubElts = SubVT.getVectorNumElements();
1106     for (unsigned i = 0; i != NumSubVecs; ++i) {
1107       APInt DemandedSubElts =
1108           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1109       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1110                                Known2, TLO, Depth + 1))
1111         return true;
1112       // Known bits are shared by every demanded subvector element.
1113       if (!!DemandedSubElts)
1114         Known = KnownBits::commonBits(Known, Known2);
1115     }
1116     break;
1117   }
1118   case ISD::VECTOR_SHUFFLE: {
1119     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1120 
1121     // Collect demanded elements from shuffle operands..
1122     APInt DemandedLHS(NumElts, 0);
1123     APInt DemandedRHS(NumElts, 0);
1124     for (unsigned i = 0; i != NumElts; ++i) {
1125       if (!DemandedElts[i])
1126         continue;
1127       int M = ShuffleMask[i];
1128       if (M < 0) {
1129         // For UNDEF elements, we don't know anything about the common state of
1130         // the shuffle result.
1131         DemandedLHS.clearAllBits();
1132         DemandedRHS.clearAllBits();
1133         break;
1134       }
1135       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1136       if (M < (int)NumElts)
1137         DemandedLHS.setBit(M);
1138       else
1139         DemandedRHS.setBit(M - NumElts);
1140     }
1141 
1142     if (!!DemandedLHS || !!DemandedRHS) {
1143       SDValue Op0 = Op.getOperand(0);
1144       SDValue Op1 = Op.getOperand(1);
1145 
1146       Known.Zero.setAllBits();
1147       Known.One.setAllBits();
1148       if (!!DemandedLHS) {
1149         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1150                                  Depth + 1))
1151           return true;
1152         Known = KnownBits::commonBits(Known, Known2);
1153       }
1154       if (!!DemandedRHS) {
1155         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1156                                  Depth + 1))
1157           return true;
1158         Known = KnownBits::commonBits(Known, Known2);
1159       }
1160 
1161       // Attempt to avoid multi-use ops if we don't need anything from them.
1162       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1163           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1164       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1165           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1166       if (DemandedOp0 || DemandedOp1) {
1167         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1168         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1169         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1170         return TLO.CombineTo(Op, NewOp);
1171       }
1172     }
1173     break;
1174   }
1175   case ISD::AND: {
1176     SDValue Op0 = Op.getOperand(0);
1177     SDValue Op1 = Op.getOperand(1);
1178 
1179     // If the RHS is a constant, check to see if the LHS would be zero without
1180     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1181     // simplify the LHS, here we're using information from the LHS to simplify
1182     // the RHS.
1183     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1184       // Do not increment Depth here; that can cause an infinite loop.
1185       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1186       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1187       if ((LHSKnown.Zero & DemandedBits) ==
1188           (~RHSC->getAPIntValue() & DemandedBits))
1189         return TLO.CombineTo(Op, Op0);
1190 
1191       // If any of the set bits in the RHS are known zero on the LHS, shrink
1192       // the constant.
1193       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1194                                  DemandedElts, TLO))
1195         return true;
1196 
1197       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1198       // constant, but if this 'and' is only clearing bits that were just set by
1199       // the xor, then this 'and' can be eliminated by shrinking the mask of
1200       // the xor. For example, for a 32-bit X:
1201       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1202       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1203           LHSKnown.One == ~RHSC->getAPIntValue()) {
1204         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1205         return TLO.CombineTo(Op, Xor);
1206       }
1207     }
1208 
1209     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1210                              Depth + 1))
1211       return true;
1212     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1213     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1214                              Known2, TLO, Depth + 1))
1215       return true;
1216     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1217 
1218     // Attempt to avoid multi-use ops if we don't need anything from them.
1219     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1220       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1221           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1222       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1223           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1224       if (DemandedOp0 || DemandedOp1) {
1225         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1226         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1227         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1228         return TLO.CombineTo(Op, NewOp);
1229       }
1230     }
1231 
1232     // If all of the demanded bits are known one on one side, return the other.
1233     // These bits cannot contribute to the result of the 'and'.
1234     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1235       return TLO.CombineTo(Op, Op0);
1236     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1237       return TLO.CombineTo(Op, Op1);
1238     // If all of the demanded bits in the inputs are known zeros, return zero.
1239     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1240       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1241     // If the RHS is a constant, see if we can simplify it.
1242     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1243                                TLO))
1244       return true;
1245     // If the operation can be done in a smaller type, do so.
1246     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1247       return true;
1248 
1249     Known &= Known2;
1250     break;
1251   }
1252   case ISD::OR: {
1253     SDValue Op0 = Op.getOperand(0);
1254     SDValue Op1 = Op.getOperand(1);
1255 
1256     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1257                              Depth + 1))
1258       return true;
1259     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1260     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1261                              Known2, TLO, Depth + 1))
1262       return true;
1263     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1264 
1265     // Attempt to avoid multi-use ops if we don't need anything from them.
1266     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1267       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1268           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1269       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1270           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1271       if (DemandedOp0 || DemandedOp1) {
1272         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1273         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1274         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1275         return TLO.CombineTo(Op, NewOp);
1276       }
1277     }
1278 
1279     // If all of the demanded bits are known zero on one side, return the other.
1280     // These bits cannot contribute to the result of the 'or'.
1281     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1282       return TLO.CombineTo(Op, Op0);
1283     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1284       return TLO.CombineTo(Op, Op1);
1285     // If the RHS is a constant, see if we can simplify it.
1286     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1287       return true;
1288     // If the operation can be done in a smaller type, do so.
1289     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1290       return true;
1291 
1292     Known |= Known2;
1293     break;
1294   }
1295   case ISD::XOR: {
1296     SDValue Op0 = Op.getOperand(0);
1297     SDValue Op1 = Op.getOperand(1);
1298 
1299     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1300                              Depth + 1))
1301       return true;
1302     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1303     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1304                              Depth + 1))
1305       return true;
1306     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1307 
1308     // Attempt to avoid multi-use ops if we don't need anything from them.
1309     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1310       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1311           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1312       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1313           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1314       if (DemandedOp0 || DemandedOp1) {
1315         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1316         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1317         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1318         return TLO.CombineTo(Op, NewOp);
1319       }
1320     }
1321 
1322     // If all of the demanded bits are known zero on one side, return the other.
1323     // These bits cannot contribute to the result of the 'xor'.
1324     if (DemandedBits.isSubsetOf(Known.Zero))
1325       return TLO.CombineTo(Op, Op0);
1326     if (DemandedBits.isSubsetOf(Known2.Zero))
1327       return TLO.CombineTo(Op, Op1);
1328     // If the operation can be done in a smaller type, do so.
1329     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1330       return true;
1331 
1332     // If all of the unknown bits are known to be zero on one side or the other
1333     // turn this into an *inclusive* or.
1334     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1335     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1336       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1337 
1338     ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1339     if (C) {
1340       // If one side is a constant, and all of the set bits in the constant are
1341       // also known set on the other side, turn this into an AND, as we know
1342       // the bits will be cleared.
1343       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1344       // NB: it is okay if more bits are known than are requested
1345       if (C->getAPIntValue() == Known2.One) {
1346         SDValue ANDC =
1347             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1348         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1349       }
1350 
1351       // If the RHS is a constant, see if we can change it. Don't alter a -1
1352       // constant because that's a 'not' op, and that is better for combining
1353       // and codegen.
1354       if (!C->isAllOnesValue() &&
1355           DemandedBits.isSubsetOf(C->getAPIntValue())) {
1356         // We're flipping all demanded bits. Flip the undemanded bits too.
1357         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1358         return TLO.CombineTo(Op, New);
1359       }
1360     }
1361 
1362     // If we can't turn this into a 'not', try to shrink the constant.
1363     if (!C || !C->isAllOnesValue())
1364       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1365         return true;
1366 
1367     Known ^= Known2;
1368     break;
1369   }
1370   case ISD::SELECT:
1371     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1372                              Depth + 1))
1373       return true;
1374     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1375                              Depth + 1))
1376       return true;
1377     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1378     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1379 
1380     // If the operands are constants, see if we can simplify them.
1381     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1382       return true;
1383 
1384     // Only known if known in both the LHS and RHS.
1385     Known = KnownBits::commonBits(Known, Known2);
1386     break;
1387   case ISD::SELECT_CC:
1388     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1389                              Depth + 1))
1390       return true;
1391     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1392                              Depth + 1))
1393       return true;
1394     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1395     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1396 
1397     // If the operands are constants, see if we can simplify them.
1398     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1399       return true;
1400 
1401     // Only known if known in both the LHS and RHS.
1402     Known = KnownBits::commonBits(Known, Known2);
1403     break;
1404   case ISD::SETCC: {
1405     SDValue Op0 = Op.getOperand(0);
1406     SDValue Op1 = Op.getOperand(1);
1407     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1408     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1409     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1410     // -1, we may be able to bypass the setcc.
1411     if (DemandedBits.isSignMask() &&
1412         Op0.getScalarValueSizeInBits() == BitWidth &&
1413         getBooleanContents(Op0.getValueType()) ==
1414             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1415       // If we're testing X < 0, then this compare isn't needed - just use X!
1416       // FIXME: We're limiting to integer types here, but this should also work
1417       // if we don't care about FP signed-zero. The use of SETLT with FP means
1418       // that we don't care about NaNs.
1419       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1420           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1421         return TLO.CombineTo(Op, Op0);
1422 
1423       // TODO: Should we check for other forms of sign-bit comparisons?
1424       // Examples: X <= -1, X >= 0
1425     }
1426     if (getBooleanContents(Op0.getValueType()) ==
1427             TargetLowering::ZeroOrOneBooleanContent &&
1428         BitWidth > 1)
1429       Known.Zero.setBitsFrom(1);
1430     break;
1431   }
1432   case ISD::SHL: {
1433     SDValue Op0 = Op.getOperand(0);
1434     SDValue Op1 = Op.getOperand(1);
1435     EVT ShiftVT = Op1.getValueType();
1436 
1437     if (const APInt *SA =
1438             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1439       unsigned ShAmt = SA->getZExtValue();
1440       if (ShAmt == 0)
1441         return TLO.CombineTo(Op, Op0);
1442 
1443       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1444       // single shift.  We can do this if the bottom bits (which are shifted
1445       // out) are never demanded.
1446       // TODO - support non-uniform vector amounts.
1447       if (Op0.getOpcode() == ISD::SRL) {
1448         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1449           if (const APInt *SA2 =
1450                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1451             unsigned C1 = SA2->getZExtValue();
1452             unsigned Opc = ISD::SHL;
1453             int Diff = ShAmt - C1;
1454             if (Diff < 0) {
1455               Diff = -Diff;
1456               Opc = ISD::SRL;
1457             }
1458             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1459             return TLO.CombineTo(
1460                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1461           }
1462         }
1463       }
1464 
1465       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1466       // are not demanded. This will likely allow the anyext to be folded away.
1467       // TODO - support non-uniform vector amounts.
1468       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1469         SDValue InnerOp = Op0.getOperand(0);
1470         EVT InnerVT = InnerOp.getValueType();
1471         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1472         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1473             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1474           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1475           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1476             ShTy = InnerVT;
1477           SDValue NarrowShl =
1478               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1479                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1480           return TLO.CombineTo(
1481               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1482         }
1483 
1484         // Repeat the SHL optimization above in cases where an extension
1485         // intervenes: (shl (anyext (shr x, c1)), c2) to
1486         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1487         // aren't demanded (as above) and that the shifted upper c1 bits of
1488         // x aren't demanded.
1489         // TODO - support non-uniform vector amounts.
1490         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1491             InnerOp.hasOneUse()) {
1492           if (const APInt *SA2 =
1493                   TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1494             unsigned InnerShAmt = SA2->getZExtValue();
1495             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1496                 DemandedBits.getActiveBits() <=
1497                     (InnerBits - InnerShAmt + ShAmt) &&
1498                 DemandedBits.countTrailingZeros() >= ShAmt) {
1499               SDValue NewSA =
1500                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1501               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1502                                                InnerOp.getOperand(0));
1503               return TLO.CombineTo(
1504                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1505             }
1506           }
1507         }
1508       }
1509 
1510       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1511       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1512                                Depth + 1))
1513         return true;
1514       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1515       Known.Zero <<= ShAmt;
1516       Known.One <<= ShAmt;
1517       // low bits known zero.
1518       Known.Zero.setLowBits(ShAmt);
1519 
1520       // Try shrinking the operation as long as the shift amount will still be
1521       // in range.
1522       if ((ShAmt < DemandedBits.getActiveBits()) &&
1523           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1524         return true;
1525     }
1526 
1527     // If we are only demanding sign bits then we can use the shift source
1528     // directly.
1529     if (const APInt *MaxSA =
1530             TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1531       unsigned ShAmt = MaxSA->getZExtValue();
1532       unsigned NumSignBits =
1533           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1534       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1535       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1536         return TLO.CombineTo(Op, Op0);
1537     }
1538     break;
1539   }
1540   case ISD::SRL: {
1541     SDValue Op0 = Op.getOperand(0);
1542     SDValue Op1 = Op.getOperand(1);
1543     EVT ShiftVT = Op1.getValueType();
1544 
1545     if (const APInt *SA =
1546             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1547       unsigned ShAmt = SA->getZExtValue();
1548       if (ShAmt == 0)
1549         return TLO.CombineTo(Op, Op0);
1550 
1551       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1552       // single shift.  We can do this if the top bits (which are shifted out)
1553       // are never demanded.
1554       // TODO - support non-uniform vector amounts.
1555       if (Op0.getOpcode() == ISD::SHL) {
1556         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1557           if (const APInt *SA2 =
1558                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1559             unsigned C1 = SA2->getZExtValue();
1560             unsigned Opc = ISD::SRL;
1561             int Diff = ShAmt - C1;
1562             if (Diff < 0) {
1563               Diff = -Diff;
1564               Opc = ISD::SHL;
1565             }
1566             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1567             return TLO.CombineTo(
1568                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1569           }
1570         }
1571       }
1572 
1573       APInt InDemandedMask = (DemandedBits << ShAmt);
1574 
1575       // If the shift is exact, then it does demand the low bits (and knows that
1576       // they are zero).
1577       if (Op->getFlags().hasExact())
1578         InDemandedMask.setLowBits(ShAmt);
1579 
1580       // Compute the new bits that are at the top now.
1581       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1582                                Depth + 1))
1583         return true;
1584       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1585       Known.Zero.lshrInPlace(ShAmt);
1586       Known.One.lshrInPlace(ShAmt);
1587       // High bits known zero.
1588       Known.Zero.setHighBits(ShAmt);
1589     }
1590     break;
1591   }
1592   case ISD::SRA: {
1593     SDValue Op0 = Op.getOperand(0);
1594     SDValue Op1 = Op.getOperand(1);
1595     EVT ShiftVT = Op1.getValueType();
1596 
1597     // If we only want bits that already match the signbit then we don't need
1598     // to shift.
1599     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1600     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1601         NumHiDemandedBits)
1602       return TLO.CombineTo(Op, Op0);
1603 
1604     // If this is an arithmetic shift right and only the low-bit is set, we can
1605     // always convert this into a logical shr, even if the shift amount is
1606     // variable.  The low bit of the shift cannot be an input sign bit unless
1607     // the shift amount is >= the size of the datatype, which is undefined.
1608     if (DemandedBits.isOneValue())
1609       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1610 
1611     if (const APInt *SA =
1612             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1613       unsigned ShAmt = SA->getZExtValue();
1614       if (ShAmt == 0)
1615         return TLO.CombineTo(Op, Op0);
1616 
1617       APInt InDemandedMask = (DemandedBits << ShAmt);
1618 
1619       // If the shift is exact, then it does demand the low bits (and knows that
1620       // they are zero).
1621       if (Op->getFlags().hasExact())
1622         InDemandedMask.setLowBits(ShAmt);
1623 
1624       // If any of the demanded bits are produced by the sign extension, we also
1625       // demand the input sign bit.
1626       if (DemandedBits.countLeadingZeros() < ShAmt)
1627         InDemandedMask.setSignBit();
1628 
1629       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1630                                Depth + 1))
1631         return true;
1632       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1633       Known.Zero.lshrInPlace(ShAmt);
1634       Known.One.lshrInPlace(ShAmt);
1635 
1636       // If the input sign bit is known to be zero, or if none of the top bits
1637       // are demanded, turn this into an unsigned shift right.
1638       if (Known.Zero[BitWidth - ShAmt - 1] ||
1639           DemandedBits.countLeadingZeros() >= ShAmt) {
1640         SDNodeFlags Flags;
1641         Flags.setExact(Op->getFlags().hasExact());
1642         return TLO.CombineTo(
1643             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1644       }
1645 
1646       int Log2 = DemandedBits.exactLogBase2();
1647       if (Log2 >= 0) {
1648         // The bit must come from the sign.
1649         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1650         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1651       }
1652 
1653       if (Known.One[BitWidth - ShAmt - 1])
1654         // New bits are known one.
1655         Known.One.setHighBits(ShAmt);
1656 
1657       // Attempt to avoid multi-use ops if we don't need anything from them.
1658       if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1659         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1660             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1661         if (DemandedOp0) {
1662           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1663           return TLO.CombineTo(Op, NewOp);
1664         }
1665       }
1666     }
1667     break;
1668   }
1669   case ISD::FSHL:
1670   case ISD::FSHR: {
1671     SDValue Op0 = Op.getOperand(0);
1672     SDValue Op1 = Op.getOperand(1);
1673     SDValue Op2 = Op.getOperand(2);
1674     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1675 
1676     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1677       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1678 
1679       // For fshl, 0-shift returns the 1st arg.
1680       // For fshr, 0-shift returns the 2nd arg.
1681       if (Amt == 0) {
1682         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1683                                  Known, TLO, Depth + 1))
1684           return true;
1685         break;
1686       }
1687 
1688       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1689       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1690       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1691       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1692       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1693                                Depth + 1))
1694         return true;
1695       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1696                                Depth + 1))
1697         return true;
1698 
1699       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1700       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1701       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1702       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1703       Known.One |= Known2.One;
1704       Known.Zero |= Known2.Zero;
1705     }
1706 
1707     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1708     if (isPowerOf2_32(BitWidth)) {
1709       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1710       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1711                                Known2, TLO, Depth + 1))
1712         return true;
1713     }
1714     break;
1715   }
1716   case ISD::ROTL:
1717   case ISD::ROTR: {
1718     SDValue Op0 = Op.getOperand(0);
1719     SDValue Op1 = Op.getOperand(1);
1720 
1721     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1722     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1723       return TLO.CombineTo(Op, Op0);
1724 
1725     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1726     if (isPowerOf2_32(BitWidth)) {
1727       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1728       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1729                                Depth + 1))
1730         return true;
1731     }
1732     break;
1733   }
1734   case ISD::UMIN: {
1735     // Check if one arg is always less than (or equal) to the other arg.
1736     SDValue Op0 = Op.getOperand(0);
1737     SDValue Op1 = Op.getOperand(1);
1738     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1739     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1740     Known = KnownBits::umin(Known0, Known1);
1741     if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1742       return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1743     if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1744       return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1745     break;
1746   }
1747   case ISD::UMAX: {
1748     // Check if one arg is always greater than (or equal) to the other arg.
1749     SDValue Op0 = Op.getOperand(0);
1750     SDValue Op1 = Op.getOperand(1);
1751     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1752     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1753     Known = KnownBits::umax(Known0, Known1);
1754     if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1755       return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1756     if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1757       return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1758     break;
1759   }
1760   case ISD::BITREVERSE: {
1761     SDValue Src = Op.getOperand(0);
1762     APInt DemandedSrcBits = DemandedBits.reverseBits();
1763     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1764                              Depth + 1))
1765       return true;
1766     Known.One = Known2.One.reverseBits();
1767     Known.Zero = Known2.Zero.reverseBits();
1768     break;
1769   }
1770   case ISD::BSWAP: {
1771     SDValue Src = Op.getOperand(0);
1772     APInt DemandedSrcBits = DemandedBits.byteSwap();
1773     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1774                              Depth + 1))
1775       return true;
1776     Known.One = Known2.One.byteSwap();
1777     Known.Zero = Known2.Zero.byteSwap();
1778     break;
1779   }
1780   case ISD::CTPOP: {
1781     // If only 1 bit is demanded, replace with PARITY as long as we're before
1782     // op legalization.
1783     // FIXME: Limit to scalars for now.
1784     if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
1785       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
1786                                                Op.getOperand(0)));
1787 
1788     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1789     break;
1790   }
1791   case ISD::SIGN_EXTEND_INREG: {
1792     SDValue Op0 = Op.getOperand(0);
1793     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1794     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1795 
1796     // If we only care about the highest bit, don't bother shifting right.
1797     if (DemandedBits.isSignMask()) {
1798       unsigned NumSignBits =
1799           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1800       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1801       // However if the input is already sign extended we expect the sign
1802       // extension to be dropped altogether later and do not simplify.
1803       if (!AlreadySignExtended) {
1804         // Compute the correct shift amount type, which must be getShiftAmountTy
1805         // for scalar types after legalization.
1806         EVT ShiftAmtTy = VT;
1807         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1808           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1809 
1810         SDValue ShiftAmt =
1811             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1812         return TLO.CombineTo(Op,
1813                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1814       }
1815     }
1816 
1817     // If none of the extended bits are demanded, eliminate the sextinreg.
1818     if (DemandedBits.getActiveBits() <= ExVTBits)
1819       return TLO.CombineTo(Op, Op0);
1820 
1821     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1822 
1823     // Since the sign extended bits are demanded, we know that the sign
1824     // bit is demanded.
1825     InputDemandedBits.setBit(ExVTBits - 1);
1826 
1827     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1828       return true;
1829     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1830 
1831     // If the sign bit of the input is known set or clear, then we know the
1832     // top bits of the result.
1833 
1834     // If the input sign bit is known zero, convert this into a zero extension.
1835     if (Known.Zero[ExVTBits - 1])
1836       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
1837 
1838     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1839     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1840       Known.One.setBitsFrom(ExVTBits);
1841       Known.Zero &= Mask;
1842     } else { // Input sign bit unknown
1843       Known.Zero &= Mask;
1844       Known.One &= Mask;
1845     }
1846     break;
1847   }
1848   case ISD::BUILD_PAIR: {
1849     EVT HalfVT = Op.getOperand(0).getValueType();
1850     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1851 
1852     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1853     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1854 
1855     KnownBits KnownLo, KnownHi;
1856 
1857     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1858       return true;
1859 
1860     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1861       return true;
1862 
1863     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1864                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1865 
1866     Known.One = KnownLo.One.zext(BitWidth) |
1867                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1868     break;
1869   }
1870   case ISD::ZERO_EXTEND:
1871   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1872     SDValue Src = Op.getOperand(0);
1873     EVT SrcVT = Src.getValueType();
1874     unsigned InBits = SrcVT.getScalarSizeInBits();
1875     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1876     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1877 
1878     // If none of the top bits are demanded, convert this into an any_extend.
1879     if (DemandedBits.getActiveBits() <= InBits) {
1880       // If we only need the non-extended bits of the bottom element
1881       // then we can just bitcast to the result.
1882       if (IsVecInReg && DemandedElts == 1 &&
1883           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1884           TLO.DAG.getDataLayout().isLittleEndian())
1885         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1886 
1887       unsigned Opc =
1888           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1889       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1890         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1891     }
1892 
1893     APInt InDemandedBits = DemandedBits.trunc(InBits);
1894     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1895     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1896                              Depth + 1))
1897       return true;
1898     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1899     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1900     Known = Known.zext(BitWidth);
1901 
1902     // Attempt to avoid multi-use ops if we don't need anything from them.
1903     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1904             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1905       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1906     break;
1907   }
1908   case ISD::SIGN_EXTEND:
1909   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1910     SDValue Src = Op.getOperand(0);
1911     EVT SrcVT = Src.getValueType();
1912     unsigned InBits = SrcVT.getScalarSizeInBits();
1913     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1914     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1915 
1916     // If none of the top bits are demanded, convert this into an any_extend.
1917     if (DemandedBits.getActiveBits() <= InBits) {
1918       // If we only need the non-extended bits of the bottom element
1919       // then we can just bitcast to the result.
1920       if (IsVecInReg && DemandedElts == 1 &&
1921           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1922           TLO.DAG.getDataLayout().isLittleEndian())
1923         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1924 
1925       unsigned Opc =
1926           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1927       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1928         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1929     }
1930 
1931     APInt InDemandedBits = DemandedBits.trunc(InBits);
1932     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1933 
1934     // Since some of the sign extended bits are demanded, we know that the sign
1935     // bit is demanded.
1936     InDemandedBits.setBit(InBits - 1);
1937 
1938     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1939                              Depth + 1))
1940       return true;
1941     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1942     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1943 
1944     // If the sign bit is known one, the top bits match.
1945     Known = Known.sext(BitWidth);
1946 
1947     // If the sign bit is known zero, convert this to a zero extend.
1948     if (Known.isNonNegative()) {
1949       unsigned Opc =
1950           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1951       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1952         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1953     }
1954 
1955     // Attempt to avoid multi-use ops if we don't need anything from them.
1956     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1957             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1958       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1959     break;
1960   }
1961   case ISD::ANY_EXTEND:
1962   case ISD::ANY_EXTEND_VECTOR_INREG: {
1963     SDValue Src = Op.getOperand(0);
1964     EVT SrcVT = Src.getValueType();
1965     unsigned InBits = SrcVT.getScalarSizeInBits();
1966     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1967     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1968 
1969     // If we only need the bottom element then we can just bitcast.
1970     // TODO: Handle ANY_EXTEND?
1971     if (IsVecInReg && DemandedElts == 1 &&
1972         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1973         TLO.DAG.getDataLayout().isLittleEndian())
1974       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1975 
1976     APInt InDemandedBits = DemandedBits.trunc(InBits);
1977     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1978     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1979                              Depth + 1))
1980       return true;
1981     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1982     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1983     Known = Known.anyext(BitWidth);
1984 
1985     // Attempt to avoid multi-use ops if we don't need anything from them.
1986     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1987             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1988       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1989     break;
1990   }
1991   case ISD::TRUNCATE: {
1992     SDValue Src = Op.getOperand(0);
1993 
1994     // Simplify the input, using demanded bit information, and compute the known
1995     // zero/one bits live out.
1996     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1997     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1998     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
1999                              Depth + 1))
2000       return true;
2001     Known = Known.trunc(BitWidth);
2002 
2003     // Attempt to avoid multi-use ops if we don't need anything from them.
2004     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2005             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2006       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2007 
2008     // If the input is only used by this truncate, see if we can shrink it based
2009     // on the known demanded bits.
2010     if (Src.getNode()->hasOneUse()) {
2011       switch (Src.getOpcode()) {
2012       default:
2013         break;
2014       case ISD::SRL:
2015         // Shrink SRL by a constant if none of the high bits shifted in are
2016         // demanded.
2017         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2018           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2019           // undesirable.
2020           break;
2021 
2022         const APInt *ShAmtC =
2023             TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2024         if (!ShAmtC || ShAmtC->uge(BitWidth))
2025           break;
2026         uint64_t ShVal = ShAmtC->getZExtValue();
2027 
2028         APInt HighBits =
2029             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2030         HighBits.lshrInPlace(ShVal);
2031         HighBits = HighBits.trunc(BitWidth);
2032 
2033         if (!(HighBits & DemandedBits)) {
2034           // None of the shifted in bits are needed.  Add a truncate of the
2035           // shift input, then shift it.
2036           SDValue NewShAmt = TLO.DAG.getConstant(
2037               ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2038           SDValue NewTrunc =
2039               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2040           return TLO.CombineTo(
2041               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2042         }
2043         break;
2044       }
2045     }
2046 
2047     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2048     break;
2049   }
2050   case ISD::AssertZext: {
2051     // AssertZext demands all of the high bits, plus any of the low bits
2052     // demanded by its users.
2053     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2054     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2055     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2056                              TLO, Depth + 1))
2057       return true;
2058     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2059 
2060     Known.Zero |= ~InMask;
2061     break;
2062   }
2063   case ISD::EXTRACT_VECTOR_ELT: {
2064     SDValue Src = Op.getOperand(0);
2065     SDValue Idx = Op.getOperand(1);
2066     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2067     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2068 
2069     if (SrcEltCnt.isScalable())
2070       return false;
2071 
2072     // Demand the bits from every vector element without a constant index.
2073     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2074     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
2075     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2076       if (CIdx->getAPIntValue().ult(NumSrcElts))
2077         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2078 
2079     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2080     // anything about the extended bits.
2081     APInt DemandedSrcBits = DemandedBits;
2082     if (BitWidth > EltBitWidth)
2083       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2084 
2085     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2086                              Depth + 1))
2087       return true;
2088 
2089     // Attempt to avoid multi-use ops if we don't need anything from them.
2090     if (!DemandedSrcBits.isAllOnesValue() ||
2091         !DemandedSrcElts.isAllOnesValue()) {
2092       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2093               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2094         SDValue NewOp =
2095             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2096         return TLO.CombineTo(Op, NewOp);
2097       }
2098     }
2099 
2100     Known = Known2;
2101     if (BitWidth > EltBitWidth)
2102       Known = Known.anyext(BitWidth);
2103     break;
2104   }
2105   case ISD::BITCAST: {
2106     SDValue Src = Op.getOperand(0);
2107     EVT SrcVT = Src.getValueType();
2108     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2109 
2110     // If this is an FP->Int bitcast and if the sign bit is the only
2111     // thing demanded, turn this into a FGETSIGN.
2112     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2113         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2114         SrcVT.isFloatingPoint()) {
2115       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2116       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2117       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2118           SrcVT != MVT::f128) {
2119         // Cannot eliminate/lower SHL for f128 yet.
2120         EVT Ty = OpVTLegal ? VT : MVT::i32;
2121         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2122         // place.  We expect the SHL to be eliminated by other optimizations.
2123         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2124         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2125         if (!OpVTLegal && OpVTSizeInBits > 32)
2126           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2127         unsigned ShVal = Op.getValueSizeInBits() - 1;
2128         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2129         return TLO.CombineTo(Op,
2130                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2131       }
2132     }
2133 
2134     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2135     // Demand the elt/bit if any of the original elts/bits are demanded.
2136     // TODO - bigendian once we have test coverage.
2137     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
2138         TLO.DAG.getDataLayout().isLittleEndian()) {
2139       unsigned Scale = BitWidth / NumSrcEltBits;
2140       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2141       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2142       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2143       for (unsigned i = 0; i != Scale; ++i) {
2144         unsigned Offset = i * NumSrcEltBits;
2145         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
2146         if (!Sub.isNullValue()) {
2147           DemandedSrcBits |= Sub;
2148           for (unsigned j = 0; j != NumElts; ++j)
2149             if (DemandedElts[j])
2150               DemandedSrcElts.setBit((j * Scale) + i);
2151         }
2152       }
2153 
2154       APInt KnownSrcUndef, KnownSrcZero;
2155       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2156                                      KnownSrcZero, TLO, Depth + 1))
2157         return true;
2158 
2159       KnownBits KnownSrcBits;
2160       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2161                                KnownSrcBits, TLO, Depth + 1))
2162         return true;
2163     } else if ((NumSrcEltBits % BitWidth) == 0 &&
2164                TLO.DAG.getDataLayout().isLittleEndian()) {
2165       unsigned Scale = NumSrcEltBits / BitWidth;
2166       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2167       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2168       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2169       for (unsigned i = 0; i != NumElts; ++i)
2170         if (DemandedElts[i]) {
2171           unsigned Offset = (i % Scale) * BitWidth;
2172           DemandedSrcBits.insertBits(DemandedBits, Offset);
2173           DemandedSrcElts.setBit(i / Scale);
2174         }
2175 
2176       if (SrcVT.isVector()) {
2177         APInt KnownSrcUndef, KnownSrcZero;
2178         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2179                                        KnownSrcZero, TLO, Depth + 1))
2180           return true;
2181       }
2182 
2183       KnownBits KnownSrcBits;
2184       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2185                                KnownSrcBits, TLO, Depth + 1))
2186         return true;
2187     }
2188 
2189     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2190     // recursive call where Known may be useful to the caller.
2191     if (Depth > 0) {
2192       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2193       return false;
2194     }
2195     break;
2196   }
2197   case ISD::ADD:
2198   case ISD::MUL:
2199   case ISD::SUB: {
2200     // Add, Sub, and Mul don't demand any bits in positions beyond that
2201     // of the highest bit demanded of them.
2202     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2203     SDNodeFlags Flags = Op.getNode()->getFlags();
2204     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2205     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2206     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2207                              Depth + 1) ||
2208         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2209                              Depth + 1) ||
2210         // See if the operation should be performed at a smaller bit width.
2211         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2212       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2213         // Disable the nsw and nuw flags. We can no longer guarantee that we
2214         // won't wrap after simplification.
2215         Flags.setNoSignedWrap(false);
2216         Flags.setNoUnsignedWrap(false);
2217         SDValue NewOp =
2218             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2219         return TLO.CombineTo(Op, NewOp);
2220       }
2221       return true;
2222     }
2223 
2224     // Attempt to avoid multi-use ops if we don't need anything from them.
2225     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2226       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2227           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2228       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2229           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2230       if (DemandedOp0 || DemandedOp1) {
2231         Flags.setNoSignedWrap(false);
2232         Flags.setNoUnsignedWrap(false);
2233         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2234         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2235         SDValue NewOp =
2236             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2237         return TLO.CombineTo(Op, NewOp);
2238       }
2239     }
2240 
2241     // If we have a constant operand, we may be able to turn it into -1 if we
2242     // do not demand the high bits. This can make the constant smaller to
2243     // encode, allow more general folding, or match specialized instruction
2244     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2245     // is probably not useful (and could be detrimental).
2246     ConstantSDNode *C = isConstOrConstSplat(Op1);
2247     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2248     if (C && !C->isAllOnesValue() && !C->isOne() &&
2249         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2250       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2251       // Disable the nsw and nuw flags. We can no longer guarantee that we
2252       // won't wrap after simplification.
2253       Flags.setNoSignedWrap(false);
2254       Flags.setNoUnsignedWrap(false);
2255       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2256       return TLO.CombineTo(Op, NewOp);
2257     }
2258 
2259     LLVM_FALLTHROUGH;
2260   }
2261   default:
2262     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2263       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2264                                             Known, TLO, Depth))
2265         return true;
2266       break;
2267     }
2268 
2269     // Just use computeKnownBits to compute output bits.
2270     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2271     break;
2272   }
2273 
2274   // If we know the value of all of the demanded bits, return this as a
2275   // constant.
2276   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2277     // Avoid folding to a constant if any OpaqueConstant is involved.
2278     const SDNode *N = Op.getNode();
2279     for (SDNode *Op :
2280          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2281       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2282         if (C->isOpaque())
2283           return false;
2284     }
2285     if (VT.isInteger())
2286       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2287     if (VT.isFloatingPoint())
2288       return TLO.CombineTo(
2289           Op,
2290           TLO.DAG.getConstantFP(
2291               APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2292   }
2293 
2294   return false;
2295 }
2296 
2297 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2298                                                 const APInt &DemandedElts,
2299                                                 APInt &KnownUndef,
2300                                                 APInt &KnownZero,
2301                                                 DAGCombinerInfo &DCI) const {
2302   SelectionDAG &DAG = DCI.DAG;
2303   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2304                         !DCI.isBeforeLegalizeOps());
2305 
2306   bool Simplified =
2307       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2308   if (Simplified) {
2309     DCI.AddToWorklist(Op.getNode());
2310     DCI.CommitTargetLoweringOpt(TLO);
2311   }
2312 
2313   return Simplified;
2314 }
2315 
2316 /// Given a vector binary operation and known undefined elements for each input
2317 /// operand, compute whether each element of the output is undefined.
2318 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2319                                          const APInt &UndefOp0,
2320                                          const APInt &UndefOp1) {
2321   EVT VT = BO.getValueType();
2322   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2323          "Vector binop only");
2324 
2325   EVT EltVT = VT.getVectorElementType();
2326   unsigned NumElts = VT.getVectorNumElements();
2327   assert(UndefOp0.getBitWidth() == NumElts &&
2328          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2329 
2330   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2331                                    const APInt &UndefVals) {
2332     if (UndefVals[Index])
2333       return DAG.getUNDEF(EltVT);
2334 
2335     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2336       // Try hard to make sure that the getNode() call is not creating temporary
2337       // nodes. Ignore opaque integers because they do not constant fold.
2338       SDValue Elt = BV->getOperand(Index);
2339       auto *C = dyn_cast<ConstantSDNode>(Elt);
2340       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2341         return Elt;
2342     }
2343 
2344     return SDValue();
2345   };
2346 
2347   APInt KnownUndef = APInt::getNullValue(NumElts);
2348   for (unsigned i = 0; i != NumElts; ++i) {
2349     // If both inputs for this element are either constant or undef and match
2350     // the element type, compute the constant/undef result for this element of
2351     // the vector.
2352     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2353     // not handle FP constants. The code within getNode() should be refactored
2354     // to avoid the danger of creating a bogus temporary node here.
2355     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2356     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2357     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2358       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2359         KnownUndef.setBit(i);
2360   }
2361   return KnownUndef;
2362 }
2363 
2364 bool TargetLowering::SimplifyDemandedVectorElts(
2365     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2366     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2367     bool AssumeSingleUse) const {
2368   EVT VT = Op.getValueType();
2369   unsigned Opcode = Op.getOpcode();
2370   APInt DemandedElts = OriginalDemandedElts;
2371   unsigned NumElts = DemandedElts.getBitWidth();
2372   assert(VT.isVector() && "Expected vector op");
2373 
2374   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2375 
2376   // TODO: For now we assume we know nothing about scalable vectors.
2377   if (VT.isScalableVector())
2378     return false;
2379 
2380   assert(VT.getVectorNumElements() == NumElts &&
2381          "Mask size mismatches value type element count!");
2382 
2383   // Undef operand.
2384   if (Op.isUndef()) {
2385     KnownUndef.setAllBits();
2386     return false;
2387   }
2388 
2389   // If Op has other users, assume that all elements are needed.
2390   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2391     DemandedElts.setAllBits();
2392 
2393   // Not demanding any elements from Op.
2394   if (DemandedElts == 0) {
2395     KnownUndef.setAllBits();
2396     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2397   }
2398 
2399   // Limit search depth.
2400   if (Depth >= SelectionDAG::MaxRecursionDepth)
2401     return false;
2402 
2403   SDLoc DL(Op);
2404   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2405 
2406   // Helper for demanding the specified elements and all the bits of both binary
2407   // operands.
2408   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2409     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2410                                                            TLO.DAG, Depth + 1);
2411     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2412                                                            TLO.DAG, Depth + 1);
2413     if (NewOp0 || NewOp1) {
2414       SDValue NewOp = TLO.DAG.getNode(
2415           Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2416       return TLO.CombineTo(Op, NewOp);
2417     }
2418     return false;
2419   };
2420 
2421   switch (Opcode) {
2422   case ISD::SCALAR_TO_VECTOR: {
2423     if (!DemandedElts[0]) {
2424       KnownUndef.setAllBits();
2425       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2426     }
2427     KnownUndef.setHighBits(NumElts - 1);
2428     break;
2429   }
2430   case ISD::BITCAST: {
2431     SDValue Src = Op.getOperand(0);
2432     EVT SrcVT = Src.getValueType();
2433 
2434     // We only handle vectors here.
2435     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2436     if (!SrcVT.isVector())
2437       break;
2438 
2439     // Fast handling of 'identity' bitcasts.
2440     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2441     if (NumSrcElts == NumElts)
2442       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2443                                         KnownZero, TLO, Depth + 1);
2444 
2445     APInt SrcZero, SrcUndef;
2446     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2447 
2448     // Bitcast from 'large element' src vector to 'small element' vector, we
2449     // must demand a source element if any DemandedElt maps to it.
2450     if ((NumElts % NumSrcElts) == 0) {
2451       unsigned Scale = NumElts / NumSrcElts;
2452       for (unsigned i = 0; i != NumElts; ++i)
2453         if (DemandedElts[i])
2454           SrcDemandedElts.setBit(i / Scale);
2455 
2456       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2457                                      TLO, Depth + 1))
2458         return true;
2459 
2460       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2461       // of the large element.
2462       // TODO - bigendian once we have test coverage.
2463       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2464         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2465         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2466         for (unsigned i = 0; i != NumElts; ++i)
2467           if (DemandedElts[i]) {
2468             unsigned Ofs = (i % Scale) * EltSizeInBits;
2469             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2470           }
2471 
2472         KnownBits Known;
2473         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2474                                  TLO, Depth + 1))
2475           return true;
2476       }
2477 
2478       // If the src element is zero/undef then all the output elements will be -
2479       // only demanded elements are guaranteed to be correct.
2480       for (unsigned i = 0; i != NumSrcElts; ++i) {
2481         if (SrcDemandedElts[i]) {
2482           if (SrcZero[i])
2483             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2484           if (SrcUndef[i])
2485             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2486         }
2487       }
2488     }
2489 
2490     // Bitcast from 'small element' src vector to 'large element' vector, we
2491     // demand all smaller source elements covered by the larger demanded element
2492     // of this vector.
2493     if ((NumSrcElts % NumElts) == 0) {
2494       unsigned Scale = NumSrcElts / NumElts;
2495       for (unsigned i = 0; i != NumElts; ++i)
2496         if (DemandedElts[i])
2497           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2498 
2499       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2500                                      TLO, Depth + 1))
2501         return true;
2502 
2503       // If all the src elements covering an output element are zero/undef, then
2504       // the output element will be as well, assuming it was demanded.
2505       for (unsigned i = 0; i != NumElts; ++i) {
2506         if (DemandedElts[i]) {
2507           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2508             KnownZero.setBit(i);
2509           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2510             KnownUndef.setBit(i);
2511         }
2512       }
2513     }
2514     break;
2515   }
2516   case ISD::BUILD_VECTOR: {
2517     // Check all elements and simplify any unused elements with UNDEF.
2518     if (!DemandedElts.isAllOnesValue()) {
2519       // Don't simplify BROADCASTS.
2520       if (llvm::any_of(Op->op_values(),
2521                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2522         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2523         bool Updated = false;
2524         for (unsigned i = 0; i != NumElts; ++i) {
2525           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2526             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2527             KnownUndef.setBit(i);
2528             Updated = true;
2529           }
2530         }
2531         if (Updated)
2532           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2533       }
2534     }
2535     for (unsigned i = 0; i != NumElts; ++i) {
2536       SDValue SrcOp = Op.getOperand(i);
2537       if (SrcOp.isUndef()) {
2538         KnownUndef.setBit(i);
2539       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2540                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2541         KnownZero.setBit(i);
2542       }
2543     }
2544     break;
2545   }
2546   case ISD::CONCAT_VECTORS: {
2547     EVT SubVT = Op.getOperand(0).getValueType();
2548     unsigned NumSubVecs = Op.getNumOperands();
2549     unsigned NumSubElts = SubVT.getVectorNumElements();
2550     for (unsigned i = 0; i != NumSubVecs; ++i) {
2551       SDValue SubOp = Op.getOperand(i);
2552       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2553       APInt SubUndef, SubZero;
2554       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2555                                      Depth + 1))
2556         return true;
2557       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2558       KnownZero.insertBits(SubZero, i * NumSubElts);
2559     }
2560     break;
2561   }
2562   case ISD::INSERT_SUBVECTOR: {
2563     // Demand any elements from the subvector and the remainder from the src its
2564     // inserted into.
2565     SDValue Src = Op.getOperand(0);
2566     SDValue Sub = Op.getOperand(1);
2567     uint64_t Idx = Op.getConstantOperandVal(2);
2568     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2569     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2570     APInt DemandedSrcElts = DemandedElts;
2571     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
2572 
2573     APInt SubUndef, SubZero;
2574     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2575                                    Depth + 1))
2576       return true;
2577 
2578     // If none of the src operand elements are demanded, replace it with undef.
2579     if (!DemandedSrcElts && !Src.isUndef())
2580       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2581                                                TLO.DAG.getUNDEF(VT), Sub,
2582                                                Op.getOperand(2)));
2583 
2584     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2585                                    TLO, Depth + 1))
2586       return true;
2587     KnownUndef.insertBits(SubUndef, Idx);
2588     KnownZero.insertBits(SubZero, Idx);
2589 
2590     // Attempt to avoid multi-use ops if we don't need anything from them.
2591     if (!DemandedSrcElts.isAllOnesValue() ||
2592         !DemandedSubElts.isAllOnesValue()) {
2593       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2594           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2595       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2596           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2597       if (NewSrc || NewSub) {
2598         NewSrc = NewSrc ? NewSrc : Src;
2599         NewSub = NewSub ? NewSub : Sub;
2600         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2601                                         NewSub, Op.getOperand(2));
2602         return TLO.CombineTo(Op, NewOp);
2603       }
2604     }
2605     break;
2606   }
2607   case ISD::EXTRACT_SUBVECTOR: {
2608     // Offset the demanded elts by the subvector index.
2609     SDValue Src = Op.getOperand(0);
2610     if (Src.getValueType().isScalableVector())
2611       break;
2612     uint64_t Idx = Op.getConstantOperandVal(1);
2613     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2614     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2615 
2616     APInt SrcUndef, SrcZero;
2617     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2618                                    Depth + 1))
2619       return true;
2620     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2621     KnownZero = SrcZero.extractBits(NumElts, Idx);
2622 
2623     // Attempt to avoid multi-use ops if we don't need anything from them.
2624     if (!DemandedElts.isAllOnesValue()) {
2625       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2626           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2627       if (NewSrc) {
2628         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2629                                         Op.getOperand(1));
2630         return TLO.CombineTo(Op, NewOp);
2631       }
2632     }
2633     break;
2634   }
2635   case ISD::INSERT_VECTOR_ELT: {
2636     SDValue Vec = Op.getOperand(0);
2637     SDValue Scl = Op.getOperand(1);
2638     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2639 
2640     // For a legal, constant insertion index, if we don't need this insertion
2641     // then strip it, else remove it from the demanded elts.
2642     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2643       unsigned Idx = CIdx->getZExtValue();
2644       if (!DemandedElts[Idx])
2645         return TLO.CombineTo(Op, Vec);
2646 
2647       APInt DemandedVecElts(DemandedElts);
2648       DemandedVecElts.clearBit(Idx);
2649       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2650                                      KnownZero, TLO, Depth + 1))
2651         return true;
2652 
2653       KnownUndef.setBitVal(Idx, Scl.isUndef());
2654 
2655       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
2656       break;
2657     }
2658 
2659     APInt VecUndef, VecZero;
2660     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2661                                    Depth + 1))
2662       return true;
2663     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2664     break;
2665   }
2666   case ISD::VSELECT: {
2667     // Try to transform the select condition based on the current demanded
2668     // elements.
2669     // TODO: If a condition element is undef, we can choose from one arm of the
2670     //       select (and if one arm is undef, then we can propagate that to the
2671     //       result).
2672     // TODO - add support for constant vselect masks (see IR version of this).
2673     APInt UnusedUndef, UnusedZero;
2674     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2675                                    UnusedZero, TLO, Depth + 1))
2676       return true;
2677 
2678     // See if we can simplify either vselect operand.
2679     APInt DemandedLHS(DemandedElts);
2680     APInt DemandedRHS(DemandedElts);
2681     APInt UndefLHS, ZeroLHS;
2682     APInt UndefRHS, ZeroRHS;
2683     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2684                                    ZeroLHS, TLO, Depth + 1))
2685       return true;
2686     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2687                                    ZeroRHS, TLO, Depth + 1))
2688       return true;
2689 
2690     KnownUndef = UndefLHS & UndefRHS;
2691     KnownZero = ZeroLHS & ZeroRHS;
2692     break;
2693   }
2694   case ISD::VECTOR_SHUFFLE: {
2695     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2696 
2697     // Collect demanded elements from shuffle operands..
2698     APInt DemandedLHS(NumElts, 0);
2699     APInt DemandedRHS(NumElts, 0);
2700     for (unsigned i = 0; i != NumElts; ++i) {
2701       int M = ShuffleMask[i];
2702       if (M < 0 || !DemandedElts[i])
2703         continue;
2704       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2705       if (M < (int)NumElts)
2706         DemandedLHS.setBit(M);
2707       else
2708         DemandedRHS.setBit(M - NumElts);
2709     }
2710 
2711     // See if we can simplify either shuffle operand.
2712     APInt UndefLHS, ZeroLHS;
2713     APInt UndefRHS, ZeroRHS;
2714     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2715                                    ZeroLHS, TLO, Depth + 1))
2716       return true;
2717     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2718                                    ZeroRHS, TLO, Depth + 1))
2719       return true;
2720 
2721     // Simplify mask using undef elements from LHS/RHS.
2722     bool Updated = false;
2723     bool IdentityLHS = true, IdentityRHS = true;
2724     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2725     for (unsigned i = 0; i != NumElts; ++i) {
2726       int &M = NewMask[i];
2727       if (M < 0)
2728         continue;
2729       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2730           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2731         Updated = true;
2732         M = -1;
2733       }
2734       IdentityLHS &= (M < 0) || (M == (int)i);
2735       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2736     }
2737 
2738     // Update legal shuffle masks based on demanded elements if it won't reduce
2739     // to Identity which can cause premature removal of the shuffle mask.
2740     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2741       SDValue LegalShuffle =
2742           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2743                                   NewMask, TLO.DAG);
2744       if (LegalShuffle)
2745         return TLO.CombineTo(Op, LegalShuffle);
2746     }
2747 
2748     // Propagate undef/zero elements from LHS/RHS.
2749     for (unsigned i = 0; i != NumElts; ++i) {
2750       int M = ShuffleMask[i];
2751       if (M < 0) {
2752         KnownUndef.setBit(i);
2753       } else if (M < (int)NumElts) {
2754         if (UndefLHS[M])
2755           KnownUndef.setBit(i);
2756         if (ZeroLHS[M])
2757           KnownZero.setBit(i);
2758       } else {
2759         if (UndefRHS[M - NumElts])
2760           KnownUndef.setBit(i);
2761         if (ZeroRHS[M - NumElts])
2762           KnownZero.setBit(i);
2763       }
2764     }
2765     break;
2766   }
2767   case ISD::ANY_EXTEND_VECTOR_INREG:
2768   case ISD::SIGN_EXTEND_VECTOR_INREG:
2769   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2770     APInt SrcUndef, SrcZero;
2771     SDValue Src = Op.getOperand(0);
2772     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2773     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2774     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2775                                    Depth + 1))
2776       return true;
2777     KnownZero = SrcZero.zextOrTrunc(NumElts);
2778     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2779 
2780     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2781         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2782         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2783       // aext - if we just need the bottom element then we can bitcast.
2784       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2785     }
2786 
2787     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2788       // zext(undef) upper bits are guaranteed to be zero.
2789       if (DemandedElts.isSubsetOf(KnownUndef))
2790         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2791       KnownUndef.clearAllBits();
2792     }
2793     break;
2794   }
2795 
2796   // TODO: There are more binop opcodes that could be handled here - MIN,
2797   // MAX, saturated math, etc.
2798   case ISD::OR:
2799   case ISD::XOR:
2800   case ISD::ADD:
2801   case ISD::SUB:
2802   case ISD::FADD:
2803   case ISD::FSUB:
2804   case ISD::FMUL:
2805   case ISD::FDIV:
2806   case ISD::FREM: {
2807     SDValue Op0 = Op.getOperand(0);
2808     SDValue Op1 = Op.getOperand(1);
2809 
2810     APInt UndefRHS, ZeroRHS;
2811     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2812                                    Depth + 1))
2813       return true;
2814     APInt UndefLHS, ZeroLHS;
2815     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2816                                    Depth + 1))
2817       return true;
2818 
2819     KnownZero = ZeroLHS & ZeroRHS;
2820     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2821 
2822     // Attempt to avoid multi-use ops if we don't need anything from them.
2823     // TODO - use KnownUndef to relax the demandedelts?
2824     if (!DemandedElts.isAllOnesValue())
2825       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2826         return true;
2827     break;
2828   }
2829   case ISD::SHL:
2830   case ISD::SRL:
2831   case ISD::SRA:
2832   case ISD::ROTL:
2833   case ISD::ROTR: {
2834     SDValue Op0 = Op.getOperand(0);
2835     SDValue Op1 = Op.getOperand(1);
2836 
2837     APInt UndefRHS, ZeroRHS;
2838     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2839                                    Depth + 1))
2840       return true;
2841     APInt UndefLHS, ZeroLHS;
2842     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2843                                    Depth + 1))
2844       return true;
2845 
2846     KnownZero = ZeroLHS;
2847     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2848 
2849     // Attempt to avoid multi-use ops if we don't need anything from them.
2850     // TODO - use KnownUndef to relax the demandedelts?
2851     if (!DemandedElts.isAllOnesValue())
2852       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2853         return true;
2854     break;
2855   }
2856   case ISD::MUL:
2857   case ISD::AND: {
2858     SDValue Op0 = Op.getOperand(0);
2859     SDValue Op1 = Op.getOperand(1);
2860 
2861     APInt SrcUndef, SrcZero;
2862     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
2863                                    Depth + 1))
2864       return true;
2865     if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
2866                                    TLO, Depth + 1))
2867       return true;
2868 
2869     // If either side has a zero element, then the result element is zero, even
2870     // if the other is an UNDEF.
2871     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2872     // and then handle 'and' nodes with the rest of the binop opcodes.
2873     KnownZero |= SrcZero;
2874     KnownUndef &= SrcUndef;
2875     KnownUndef &= ~KnownZero;
2876 
2877     // Attempt to avoid multi-use ops if we don't need anything from them.
2878     // TODO - use KnownUndef to relax the demandedelts?
2879     if (!DemandedElts.isAllOnesValue())
2880       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2881         return true;
2882     break;
2883   }
2884   case ISD::TRUNCATE:
2885   case ISD::SIGN_EXTEND:
2886   case ISD::ZERO_EXTEND:
2887     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2888                                    KnownZero, TLO, Depth + 1))
2889       return true;
2890 
2891     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2892       // zext(undef) upper bits are guaranteed to be zero.
2893       if (DemandedElts.isSubsetOf(KnownUndef))
2894         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2895       KnownUndef.clearAllBits();
2896     }
2897     break;
2898   default: {
2899     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2900       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2901                                                   KnownZero, TLO, Depth))
2902         return true;
2903     } else {
2904       KnownBits Known;
2905       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2906       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2907                                TLO, Depth, AssumeSingleUse))
2908         return true;
2909     }
2910     break;
2911   }
2912   }
2913   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2914 
2915   // Constant fold all undef cases.
2916   // TODO: Handle zero cases as well.
2917   if (DemandedElts.isSubsetOf(KnownUndef))
2918     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2919 
2920   return false;
2921 }
2922 
2923 /// Determine which of the bits specified in Mask are known to be either zero or
2924 /// one and return them in the Known.
2925 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2926                                                    KnownBits &Known,
2927                                                    const APInt &DemandedElts,
2928                                                    const SelectionDAG &DAG,
2929                                                    unsigned Depth) const {
2930   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2931           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2932           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2933           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2934          "Should use MaskedValueIsZero if you don't know whether Op"
2935          " is a target node!");
2936   Known.resetAll();
2937 }
2938 
2939 void TargetLowering::computeKnownBitsForTargetInstr(
2940     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2941     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2942     unsigned Depth) const {
2943   Known.resetAll();
2944 }
2945 
2946 void TargetLowering::computeKnownBitsForFrameIndex(
2947   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
2948   // The low bits are known zero if the pointer is aligned.
2949   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
2950 }
2951 
2952 Align TargetLowering::computeKnownAlignForTargetInstr(
2953   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2954   unsigned Depth) const {
2955   return Align(1);
2956 }
2957 
2958 /// This method can be implemented by targets that want to expose additional
2959 /// information about sign bits to the DAG Combiner.
2960 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2961                                                          const APInt &,
2962                                                          const SelectionDAG &,
2963                                                          unsigned Depth) const {
2964   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2965           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2966           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2967           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2968          "Should use ComputeNumSignBits if you don't know whether Op"
2969          " is a target node!");
2970   return 1;
2971 }
2972 
2973 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
2974   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
2975   const MachineRegisterInfo &MRI, unsigned Depth) const {
2976   return 1;
2977 }
2978 
2979 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2980     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2981     TargetLoweringOpt &TLO, unsigned Depth) const {
2982   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2983           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2984           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2985           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2986          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2987          " is a target node!");
2988   return false;
2989 }
2990 
2991 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2992     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2993     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2994   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2995           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2996           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2997           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2998          "Should use SimplifyDemandedBits if you don't know whether Op"
2999          " is a target node!");
3000   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3001   return false;
3002 }
3003 
3004 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3005     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3006     SelectionDAG &DAG, unsigned Depth) const {
3007   assert(
3008       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3009        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3010        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3011        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3012       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3013       " is a target node!");
3014   return SDValue();
3015 }
3016 
3017 SDValue
3018 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3019                                         SDValue N1, MutableArrayRef<int> Mask,
3020                                         SelectionDAG &DAG) const {
3021   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3022   if (!LegalMask) {
3023     std::swap(N0, N1);
3024     ShuffleVectorSDNode::commuteMask(Mask);
3025     LegalMask = isShuffleMaskLegal(Mask, VT);
3026   }
3027 
3028   if (!LegalMask)
3029     return SDValue();
3030 
3031   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3032 }
3033 
3034 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3035   return nullptr;
3036 }
3037 
3038 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3039                                                   const SelectionDAG &DAG,
3040                                                   bool SNaN,
3041                                                   unsigned Depth) const {
3042   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3043           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3044           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3045           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3046          "Should use isKnownNeverNaN if you don't know whether Op"
3047          " is a target node!");
3048   return false;
3049 }
3050 
3051 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3052 // work with truncating build vectors and vectors with elements of less than
3053 // 8 bits.
3054 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
3055   if (!N)
3056     return false;
3057 
3058   APInt CVal;
3059   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3060     CVal = CN->getAPIntValue();
3061   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
3062     auto *CN = BV->getConstantSplatNode();
3063     if (!CN)
3064       return false;
3065 
3066     // If this is a truncating build vector, truncate the splat value.
3067     // Otherwise, we may fail to match the expected values below.
3068     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
3069     CVal = CN->getAPIntValue();
3070     if (BVEltWidth < CVal.getBitWidth())
3071       CVal = CVal.trunc(BVEltWidth);
3072   } else {
3073     return false;
3074   }
3075 
3076   switch (getBooleanContents(N->getValueType(0))) {
3077   case UndefinedBooleanContent:
3078     return CVal[0];
3079   case ZeroOrOneBooleanContent:
3080     return CVal.isOneValue();
3081   case ZeroOrNegativeOneBooleanContent:
3082     return CVal.isAllOnesValue();
3083   }
3084 
3085   llvm_unreachable("Invalid boolean contents");
3086 }
3087 
3088 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
3089   if (!N)
3090     return false;
3091 
3092   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3093   if (!CN) {
3094     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3095     if (!BV)
3096       return false;
3097 
3098     // Only interested in constant splats, we don't care about undef
3099     // elements in identifying boolean constants and getConstantSplatNode
3100     // returns NULL if all ops are undef;
3101     CN = BV->getConstantSplatNode();
3102     if (!CN)
3103       return false;
3104   }
3105 
3106   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3107     return !CN->getAPIntValue()[0];
3108 
3109   return CN->isNullValue();
3110 }
3111 
3112 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3113                                        bool SExt) const {
3114   if (VT == MVT::i1)
3115     return N->isOne();
3116 
3117   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3118   switch (Cnt) {
3119   case TargetLowering::ZeroOrOneBooleanContent:
3120     // An extended value of 1 is always true, unless its original type is i1,
3121     // in which case it will be sign extended to -1.
3122     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3123   case TargetLowering::UndefinedBooleanContent:
3124   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3125     return N->isAllOnesValue() && SExt;
3126   }
3127   llvm_unreachable("Unexpected enumeration.");
3128 }
3129 
3130 /// This helper function of SimplifySetCC tries to optimize the comparison when
3131 /// either operand of the SetCC node is a bitwise-and instruction.
3132 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3133                                          ISD::CondCode Cond, const SDLoc &DL,
3134                                          DAGCombinerInfo &DCI) const {
3135   // Match these patterns in any of their permutations:
3136   // (X & Y) == Y
3137   // (X & Y) != Y
3138   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3139     std::swap(N0, N1);
3140 
3141   EVT OpVT = N0.getValueType();
3142   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3143       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3144     return SDValue();
3145 
3146   SDValue X, Y;
3147   if (N0.getOperand(0) == N1) {
3148     X = N0.getOperand(1);
3149     Y = N0.getOperand(0);
3150   } else if (N0.getOperand(1) == N1) {
3151     X = N0.getOperand(0);
3152     Y = N0.getOperand(1);
3153   } else {
3154     return SDValue();
3155   }
3156 
3157   SelectionDAG &DAG = DCI.DAG;
3158   SDValue Zero = DAG.getConstant(0, DL, OpVT);
3159   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3160     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3161     // Note that where Y is variable and is known to have at most one bit set
3162     // (for example, if it is Z & 1) we cannot do this; the expressions are not
3163     // equivalent when Y == 0.
3164     assert(OpVT.isInteger());
3165     Cond = ISD::getSetCCInverse(Cond, OpVT);
3166     if (DCI.isBeforeLegalizeOps() ||
3167         isCondCodeLegal(Cond, N0.getSimpleValueType()))
3168       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3169   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3170     // If the target supports an 'and-not' or 'and-complement' logic operation,
3171     // try to use that to make a comparison operation more efficient.
3172     // But don't do this transform if the mask is a single bit because there are
3173     // more efficient ways to deal with that case (for example, 'bt' on x86 or
3174     // 'rlwinm' on PPC).
3175 
3176     // Bail out if the compare operand that we want to turn into a zero is
3177     // already a zero (otherwise, infinite loop).
3178     auto *YConst = dyn_cast<ConstantSDNode>(Y);
3179     if (YConst && YConst->isNullValue())
3180       return SDValue();
3181 
3182     // Transform this into: ~X & Y == 0.
3183     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3184     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3185     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3186   }
3187 
3188   return SDValue();
3189 }
3190 
3191 /// There are multiple IR patterns that could be checking whether certain
3192 /// truncation of a signed number would be lossy or not. The pattern which is
3193 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3194 /// We are looking for the following pattern: (KeptBits is a constant)
3195 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3196 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3197 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
3198 /// We will unfold it into the natural trunc+sext pattern:
3199 ///   ((%x << C) a>> C) dstcond %x
3200 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
3201 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3202     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3203     const SDLoc &DL) const {
3204   // We must be comparing with a constant.
3205   ConstantSDNode *C1;
3206   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3207     return SDValue();
3208 
3209   // N0 should be:  add %x, (1 << (KeptBits-1))
3210   if (N0->getOpcode() != ISD::ADD)
3211     return SDValue();
3212 
3213   // And we must be 'add'ing a constant.
3214   ConstantSDNode *C01;
3215   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3216     return SDValue();
3217 
3218   SDValue X = N0->getOperand(0);
3219   EVT XVT = X.getValueType();
3220 
3221   // Validate constants ...
3222 
3223   APInt I1 = C1->getAPIntValue();
3224 
3225   ISD::CondCode NewCond;
3226   if (Cond == ISD::CondCode::SETULT) {
3227     NewCond = ISD::CondCode::SETEQ;
3228   } else if (Cond == ISD::CondCode::SETULE) {
3229     NewCond = ISD::CondCode::SETEQ;
3230     // But need to 'canonicalize' the constant.
3231     I1 += 1;
3232   } else if (Cond == ISD::CondCode::SETUGT) {
3233     NewCond = ISD::CondCode::SETNE;
3234     // But need to 'canonicalize' the constant.
3235     I1 += 1;
3236   } else if (Cond == ISD::CondCode::SETUGE) {
3237     NewCond = ISD::CondCode::SETNE;
3238   } else
3239     return SDValue();
3240 
3241   APInt I01 = C01->getAPIntValue();
3242 
3243   auto checkConstants = [&I1, &I01]() -> bool {
3244     // Both of them must be power-of-two, and the constant from setcc is bigger.
3245     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3246   };
3247 
3248   if (checkConstants()) {
3249     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
3250   } else {
3251     // What if we invert constants? (and the target predicate)
3252     I1.negate();
3253     I01.negate();
3254     assert(XVT.isInteger());
3255     NewCond = getSetCCInverse(NewCond, XVT);
3256     if (!checkConstants())
3257       return SDValue();
3258     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
3259   }
3260 
3261   // They are power-of-two, so which bit is set?
3262   const unsigned KeptBits = I1.logBase2();
3263   const unsigned KeptBitsMinusOne = I01.logBase2();
3264 
3265   // Magic!
3266   if (KeptBits != (KeptBitsMinusOne + 1))
3267     return SDValue();
3268   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3269 
3270   // We don't want to do this in every single case.
3271   SelectionDAG &DAG = DCI.DAG;
3272   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3273           XVT, KeptBits))
3274     return SDValue();
3275 
3276   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3277   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3278 
3279   // Unfold into:  ((%x << C) a>> C) cond %x
3280   // Where 'cond' will be either 'eq' or 'ne'.
3281   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3282   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3283   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3284   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3285 
3286   return T2;
3287 }
3288 
3289 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3290 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3291     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3292     DAGCombinerInfo &DCI, const SDLoc &DL) const {
3293   assert(isConstOrConstSplat(N1C) &&
3294          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3295          "Should be a comparison with 0.");
3296   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3297          "Valid only for [in]equality comparisons.");
3298 
3299   unsigned NewShiftOpcode;
3300   SDValue X, C, Y;
3301 
3302   SelectionDAG &DAG = DCI.DAG;
3303   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3304 
3305   // Look for '(C l>>/<< Y)'.
3306   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3307     // The shift should be one-use.
3308     if (!V.hasOneUse())
3309       return false;
3310     unsigned OldShiftOpcode = V.getOpcode();
3311     switch (OldShiftOpcode) {
3312     case ISD::SHL:
3313       NewShiftOpcode = ISD::SRL;
3314       break;
3315     case ISD::SRL:
3316       NewShiftOpcode = ISD::SHL;
3317       break;
3318     default:
3319       return false; // must be a logical shift.
3320     }
3321     // We should be shifting a constant.
3322     // FIXME: best to use isConstantOrConstantVector().
3323     C = V.getOperand(0);
3324     ConstantSDNode *CC =
3325         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3326     if (!CC)
3327       return false;
3328     Y = V.getOperand(1);
3329 
3330     ConstantSDNode *XC =
3331         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3332     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3333         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3334   };
3335 
3336   // LHS of comparison should be an one-use 'and'.
3337   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3338     return SDValue();
3339 
3340   X = N0.getOperand(0);
3341   SDValue Mask = N0.getOperand(1);
3342 
3343   // 'and' is commutative!
3344   if (!Match(Mask)) {
3345     std::swap(X, Mask);
3346     if (!Match(Mask))
3347       return SDValue();
3348   }
3349 
3350   EVT VT = X.getValueType();
3351 
3352   // Produce:
3353   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3354   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3355   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3356   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3357   return T2;
3358 }
3359 
3360 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3361 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3362 /// handle the commuted versions of these patterns.
3363 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3364                                            ISD::CondCode Cond, const SDLoc &DL,
3365                                            DAGCombinerInfo &DCI) const {
3366   unsigned BOpcode = N0.getOpcode();
3367   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3368          "Unexpected binop");
3369   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3370 
3371   // (X + Y) == X --> Y == 0
3372   // (X - Y) == X --> Y == 0
3373   // (X ^ Y) == X --> Y == 0
3374   SelectionDAG &DAG = DCI.DAG;
3375   EVT OpVT = N0.getValueType();
3376   SDValue X = N0.getOperand(0);
3377   SDValue Y = N0.getOperand(1);
3378   if (X == N1)
3379     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3380 
3381   if (Y != N1)
3382     return SDValue();
3383 
3384   // (X + Y) == Y --> X == 0
3385   // (X ^ Y) == Y --> X == 0
3386   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3387     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3388 
3389   // The shift would not be valid if the operands are boolean (i1).
3390   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3391     return SDValue();
3392 
3393   // (X - Y) == Y --> X == Y << 1
3394   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3395                                  !DCI.isBeforeLegalize());
3396   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3397   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3398   if (!DCI.isCalledByLegalizer())
3399     DCI.AddToWorklist(YShl1.getNode());
3400   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3401 }
3402 
3403 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
3404                                       SDValue N0, const APInt &C1,
3405                                       ISD::CondCode Cond, const SDLoc &dl,
3406                                       SelectionDAG &DAG) {
3407   // Look through truncs that don't change the value of a ctpop.
3408   // FIXME: Add vector support? Need to be careful with setcc result type below.
3409   SDValue CTPOP = N0;
3410   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3411       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
3412     CTPOP = N0.getOperand(0);
3413 
3414   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3415     return SDValue();
3416 
3417   EVT CTVT = CTPOP.getValueType();
3418   SDValue CTOp = CTPOP.getOperand(0);
3419 
3420   // If this is a vector CTPOP, keep the CTPOP if it is legal.
3421   // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3422   if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3423     return SDValue();
3424 
3425   // (ctpop x) u< 2 -> (x & x-1) == 0
3426   // (ctpop x) u> 1 -> (x & x-1) != 0
3427   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3428     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3429     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3430       return SDValue();
3431     if (C1 == 0 && (Cond == ISD::SETULT))
3432       return SDValue(); // This is handled elsewhere.
3433 
3434     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3435 
3436     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3437     SDValue Result = CTOp;
3438     for (unsigned i = 0; i < Passes; i++) {
3439       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3440       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3441     }
3442     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3443     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3444   }
3445 
3446   // If ctpop is not supported, expand a power-of-2 comparison based on it.
3447   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3448     // For scalars, keep CTPOP if it is legal or custom.
3449     if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3450       return SDValue();
3451     // This is based on X86's custom lowering for CTPOP which produces more
3452     // instructions than the expansion here.
3453 
3454     // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3455     // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3456     SDValue Zero = DAG.getConstant(0, dl, CTVT);
3457     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3458     assert(CTVT.isInteger());
3459     ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3460     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3461     SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3462     SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3463     SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3464     unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3465     return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3466   }
3467 
3468   return SDValue();
3469 }
3470 
3471 /// Try to simplify a setcc built with the specified operands and cc. If it is
3472 /// unable to simplify it, return a null SDValue.
3473 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3474                                       ISD::CondCode Cond, bool foldBooleans,
3475                                       DAGCombinerInfo &DCI,
3476                                       const SDLoc &dl) const {
3477   SelectionDAG &DAG = DCI.DAG;
3478   const DataLayout &Layout = DAG.getDataLayout();
3479   EVT OpVT = N0.getValueType();
3480 
3481   // Constant fold or commute setcc.
3482   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3483     return Fold;
3484 
3485   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3486   // TODO: Handle non-splat vector constants. All undef causes trouble.
3487   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3488   // infinite loop here when we encounter one.
3489   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3490   if (isConstOrConstSplat(N0) &&
3491       (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
3492       (DCI.isBeforeLegalizeOps() ||
3493        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3494     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3495 
3496   // If we have a subtract with the same 2 non-constant operands as this setcc
3497   // -- but in reverse order -- then try to commute the operands of this setcc
3498   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3499   // instruction on some targets.
3500   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3501       (DCI.isBeforeLegalizeOps() ||
3502        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3503       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
3504       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
3505     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3506 
3507   if (auto *N1C = isConstOrConstSplat(N1)) {
3508     const APInt &C1 = N1C->getAPIntValue();
3509 
3510     // Optimize some CTPOP cases.
3511     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
3512       return V;
3513 
3514     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3515     // equality comparison, then we're just comparing whether X itself is
3516     // zero.
3517     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3518         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3519         isPowerOf2_32(N0.getScalarValueSizeInBits())) {
3520       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
3521         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3522             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
3523           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3524             // (srl (ctlz x), 5) == 0  -> X != 0
3525             // (srl (ctlz x), 5) != 1  -> X != 0
3526             Cond = ISD::SETNE;
3527           } else {
3528             // (srl (ctlz x), 5) != 0  -> X == 0
3529             // (srl (ctlz x), 5) == 1  -> X == 0
3530             Cond = ISD::SETEQ;
3531           }
3532           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3533           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
3534                               Cond);
3535         }
3536       }
3537     }
3538   }
3539 
3540   // FIXME: Support vectors.
3541   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3542     const APInt &C1 = N1C->getAPIntValue();
3543 
3544     // (zext x) == C --> x == (trunc C)
3545     // (sext x) == C --> x == (trunc C)
3546     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3547         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3548       unsigned MinBits = N0.getValueSizeInBits();
3549       SDValue PreExt;
3550       bool Signed = false;
3551       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3552         // ZExt
3553         MinBits = N0->getOperand(0).getValueSizeInBits();
3554         PreExt = N0->getOperand(0);
3555       } else if (N0->getOpcode() == ISD::AND) {
3556         // DAGCombine turns costly ZExts into ANDs
3557         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3558           if ((C->getAPIntValue()+1).isPowerOf2()) {
3559             MinBits = C->getAPIntValue().countTrailingOnes();
3560             PreExt = N0->getOperand(0);
3561           }
3562       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3563         // SExt
3564         MinBits = N0->getOperand(0).getValueSizeInBits();
3565         PreExt = N0->getOperand(0);
3566         Signed = true;
3567       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3568         // ZEXTLOAD / SEXTLOAD
3569         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3570           MinBits = LN0->getMemoryVT().getSizeInBits();
3571           PreExt = N0;
3572         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3573           Signed = true;
3574           MinBits = LN0->getMemoryVT().getSizeInBits();
3575           PreExt = N0;
3576         }
3577       }
3578 
3579       // Figure out how many bits we need to preserve this constant.
3580       unsigned ReqdBits = Signed ?
3581         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3582         C1.getActiveBits();
3583 
3584       // Make sure we're not losing bits from the constant.
3585       if (MinBits > 0 &&
3586           MinBits < C1.getBitWidth() &&
3587           MinBits >= ReqdBits) {
3588         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3589         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3590           // Will get folded away.
3591           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3592           if (MinBits == 1 && C1 == 1)
3593             // Invert the condition.
3594             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3595                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3596           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3597           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3598         }
3599 
3600         // If truncating the setcc operands is not desirable, we can still
3601         // simplify the expression in some cases:
3602         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3603         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3604         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3605         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3606         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3607         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3608         SDValue TopSetCC = N0->getOperand(0);
3609         unsigned N0Opc = N0->getOpcode();
3610         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3611         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3612             TopSetCC.getOpcode() == ISD::SETCC &&
3613             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3614             (isConstFalseVal(N1C) ||
3615              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3616 
3617           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3618                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3619 
3620           if (!Inverse)
3621             return TopSetCC;
3622 
3623           ISD::CondCode InvCond = ISD::getSetCCInverse(
3624               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3625               TopSetCC.getOperand(0).getValueType());
3626           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3627                                       TopSetCC.getOperand(1),
3628                                       InvCond);
3629         }
3630       }
3631     }
3632 
3633     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3634     // equality or unsigned, and all 1 bits of the const are in the same
3635     // partial word, see if we can shorten the load.
3636     if (DCI.isBeforeLegalize() &&
3637         !ISD::isSignedIntSetCC(Cond) &&
3638         N0.getOpcode() == ISD::AND && C1 == 0 &&
3639         N0.getNode()->hasOneUse() &&
3640         isa<LoadSDNode>(N0.getOperand(0)) &&
3641         N0.getOperand(0).getNode()->hasOneUse() &&
3642         isa<ConstantSDNode>(N0.getOperand(1))) {
3643       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3644       APInt bestMask;
3645       unsigned bestWidth = 0, bestOffset = 0;
3646       if (Lod->isSimple() && Lod->isUnindexed()) {
3647         unsigned origWidth = N0.getValueSizeInBits();
3648         unsigned maskWidth = origWidth;
3649         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3650         // 8 bits, but have to be careful...
3651         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3652           origWidth = Lod->getMemoryVT().getSizeInBits();
3653         const APInt &Mask = N0.getConstantOperandAPInt(1);
3654         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3655           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3656           for (unsigned offset=0; offset<origWidth/width; offset++) {
3657             if (Mask.isSubsetOf(newMask)) {
3658               if (Layout.isLittleEndian())
3659                 bestOffset = (uint64_t)offset * (width/8);
3660               else
3661                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3662               bestMask = Mask.lshr(offset * (width/8) * 8);
3663               bestWidth = width;
3664               break;
3665             }
3666             newMask <<= width;
3667           }
3668         }
3669       }
3670       if (bestWidth) {
3671         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3672         if (newVT.isRound() &&
3673             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3674           SDValue Ptr = Lod->getBasePtr();
3675           if (bestOffset != 0)
3676             Ptr =
3677                 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
3678           SDValue NewLoad =
3679               DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
3680                           Lod->getPointerInfo().getWithOffset(bestOffset),
3681                           Lod->getOriginalAlign());
3682           return DAG.getSetCC(dl, VT,
3683                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3684                                       DAG.getConstant(bestMask.trunc(bestWidth),
3685                                                       dl, newVT)),
3686                               DAG.getConstant(0LL, dl, newVT), Cond);
3687         }
3688       }
3689     }
3690 
3691     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3692     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3693       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3694 
3695       // If the comparison constant has bits in the upper part, the
3696       // zero-extended value could never match.
3697       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3698                                               C1.getBitWidth() - InSize))) {
3699         switch (Cond) {
3700         case ISD::SETUGT:
3701         case ISD::SETUGE:
3702         case ISD::SETEQ:
3703           return DAG.getConstant(0, dl, VT);
3704         case ISD::SETULT:
3705         case ISD::SETULE:
3706         case ISD::SETNE:
3707           return DAG.getConstant(1, dl, VT);
3708         case ISD::SETGT:
3709         case ISD::SETGE:
3710           // True if the sign bit of C1 is set.
3711           return DAG.getConstant(C1.isNegative(), dl, VT);
3712         case ISD::SETLT:
3713         case ISD::SETLE:
3714           // True if the sign bit of C1 isn't set.
3715           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3716         default:
3717           break;
3718         }
3719       }
3720 
3721       // Otherwise, we can perform the comparison with the low bits.
3722       switch (Cond) {
3723       case ISD::SETEQ:
3724       case ISD::SETNE:
3725       case ISD::SETUGT:
3726       case ISD::SETUGE:
3727       case ISD::SETULT:
3728       case ISD::SETULE: {
3729         EVT newVT = N0.getOperand(0).getValueType();
3730         if (DCI.isBeforeLegalizeOps() ||
3731             (isOperationLegal(ISD::SETCC, newVT) &&
3732              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3733           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3734           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3735 
3736           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3737                                           NewConst, Cond);
3738           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3739         }
3740         break;
3741       }
3742       default:
3743         break; // todo, be more careful with signed comparisons
3744       }
3745     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3746                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3747                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
3748                                       OpVT)) {
3749       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3750       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3751       EVT ExtDstTy = N0.getValueType();
3752       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3753 
3754       // If the constant doesn't fit into the number of bits for the source of
3755       // the sign extension, it is impossible for both sides to be equal.
3756       if (C1.getMinSignedBits() > ExtSrcTyBits)
3757         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
3758 
3759       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
3760              ExtDstTy != ExtSrcTy && "Unexpected types!");
3761       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3762       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
3763                                    DAG.getConstant(Imm, dl, ExtDstTy));
3764       if (!DCI.isCalledByLegalizer())
3765         DCI.AddToWorklist(ZextOp.getNode());
3766       // Otherwise, make this a use of a zext.
3767       return DAG.getSetCC(dl, VT, ZextOp,
3768                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
3769     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3770                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3771       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3772       if (N0.getOpcode() == ISD::SETCC &&
3773           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3774           (N0.getValueType() == MVT::i1 ||
3775            getBooleanContents(N0.getOperand(0).getValueType()) ==
3776                        ZeroOrOneBooleanContent)) {
3777         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3778         if (TrueWhenTrue)
3779           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3780         // Invert the condition.
3781         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3782         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3783         if (DCI.isBeforeLegalizeOps() ||
3784             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3785           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3786       }
3787 
3788       if ((N0.getOpcode() == ISD::XOR ||
3789            (N0.getOpcode() == ISD::AND &&
3790             N0.getOperand(0).getOpcode() == ISD::XOR &&
3791             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3792           isOneConstant(N0.getOperand(1))) {
3793         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3794         // can only do this if the top bits are known zero.
3795         unsigned BitWidth = N0.getValueSizeInBits();
3796         if (DAG.MaskedValueIsZero(N0,
3797                                   APInt::getHighBitsSet(BitWidth,
3798                                                         BitWidth-1))) {
3799           // Okay, get the un-inverted input value.
3800           SDValue Val;
3801           if (N0.getOpcode() == ISD::XOR) {
3802             Val = N0.getOperand(0);
3803           } else {
3804             assert(N0.getOpcode() == ISD::AND &&
3805                     N0.getOperand(0).getOpcode() == ISD::XOR);
3806             // ((X^1)&1)^1 -> X & 1
3807             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3808                               N0.getOperand(0).getOperand(0),
3809                               N0.getOperand(1));
3810           }
3811 
3812           return DAG.getSetCC(dl, VT, Val, N1,
3813                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3814         }
3815       } else if (N1C->isOne()) {
3816         SDValue Op0 = N0;
3817         if (Op0.getOpcode() == ISD::TRUNCATE)
3818           Op0 = Op0.getOperand(0);
3819 
3820         if ((Op0.getOpcode() == ISD::XOR) &&
3821             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3822             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3823           SDValue XorLHS = Op0.getOperand(0);
3824           SDValue XorRHS = Op0.getOperand(1);
3825           // Ensure that the input setccs return an i1 type or 0/1 value.
3826           if (Op0.getValueType() == MVT::i1 ||
3827               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3828                       ZeroOrOneBooleanContent &&
3829                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3830                         ZeroOrOneBooleanContent)) {
3831             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3832             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3833             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3834           }
3835         }
3836         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
3837           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3838           if (Op0.getValueType().bitsGT(VT))
3839             Op0 = DAG.getNode(ISD::AND, dl, VT,
3840                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3841                           DAG.getConstant(1, dl, VT));
3842           else if (Op0.getValueType().bitsLT(VT))
3843             Op0 = DAG.getNode(ISD::AND, dl, VT,
3844                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3845                         DAG.getConstant(1, dl, VT));
3846 
3847           return DAG.getSetCC(dl, VT, Op0,
3848                               DAG.getConstant(0, dl, Op0.getValueType()),
3849                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3850         }
3851         if (Op0.getOpcode() == ISD::AssertZext &&
3852             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3853           return DAG.getSetCC(dl, VT, Op0,
3854                               DAG.getConstant(0, dl, Op0.getValueType()),
3855                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3856       }
3857     }
3858 
3859     // Given:
3860     //   icmp eq/ne (urem %x, %y), 0
3861     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3862     //   icmp eq/ne %x, 0
3863     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3864         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3865       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3866       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3867       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3868         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3869     }
3870 
3871     if (SDValue V =
3872             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3873       return V;
3874   }
3875 
3876   // These simplifications apply to splat vectors as well.
3877   // TODO: Handle more splat vector cases.
3878   if (auto *N1C = isConstOrConstSplat(N1)) {
3879     const APInt &C1 = N1C->getAPIntValue();
3880 
3881     APInt MinVal, MaxVal;
3882     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3883     if (ISD::isSignedIntSetCC(Cond)) {
3884       MinVal = APInt::getSignedMinValue(OperandBitSize);
3885       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3886     } else {
3887       MinVal = APInt::getMinValue(OperandBitSize);
3888       MaxVal = APInt::getMaxValue(OperandBitSize);
3889     }
3890 
3891     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3892     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3893       // X >= MIN --> true
3894       if (C1 == MinVal)
3895         return DAG.getBoolConstant(true, dl, VT, OpVT);
3896 
3897       if (!VT.isVector()) { // TODO: Support this for vectors.
3898         // X >= C0 --> X > (C0 - 1)
3899         APInt C = C1 - 1;
3900         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3901         if ((DCI.isBeforeLegalizeOps() ||
3902              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3903             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3904                                   isLegalICmpImmediate(C.getSExtValue())))) {
3905           return DAG.getSetCC(dl, VT, N0,
3906                               DAG.getConstant(C, dl, N1.getValueType()),
3907                               NewCC);
3908         }
3909       }
3910     }
3911 
3912     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3913       // X <= MAX --> true
3914       if (C1 == MaxVal)
3915         return DAG.getBoolConstant(true, dl, VT, OpVT);
3916 
3917       // X <= C0 --> X < (C0 + 1)
3918       if (!VT.isVector()) { // TODO: Support this for vectors.
3919         APInt C = C1 + 1;
3920         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3921         if ((DCI.isBeforeLegalizeOps() ||
3922              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3923             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3924                                   isLegalICmpImmediate(C.getSExtValue())))) {
3925           return DAG.getSetCC(dl, VT, N0,
3926                               DAG.getConstant(C, dl, N1.getValueType()),
3927                               NewCC);
3928         }
3929       }
3930     }
3931 
3932     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3933       if (C1 == MinVal)
3934         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3935 
3936       // TODO: Support this for vectors after legalize ops.
3937       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3938         // Canonicalize setlt X, Max --> setne X, Max
3939         if (C1 == MaxVal)
3940           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3941 
3942         // If we have setult X, 1, turn it into seteq X, 0
3943         if (C1 == MinVal+1)
3944           return DAG.getSetCC(dl, VT, N0,
3945                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3946                               ISD::SETEQ);
3947       }
3948     }
3949 
3950     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3951       if (C1 == MaxVal)
3952         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3953 
3954       // TODO: Support this for vectors after legalize ops.
3955       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3956         // Canonicalize setgt X, Min --> setne X, Min
3957         if (C1 == MinVal)
3958           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3959 
3960         // If we have setugt X, Max-1, turn it into seteq X, Max
3961         if (C1 == MaxVal-1)
3962           return DAG.getSetCC(dl, VT, N0,
3963                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3964                               ISD::SETEQ);
3965       }
3966     }
3967 
3968     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3969       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3970       if (C1.isNullValue())
3971         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3972                 VT, N0, N1, Cond, DCI, dl))
3973           return CC;
3974 
3975       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
3976       // For example, when high 32-bits of i64 X are known clear:
3977       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
3978       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
3979       bool CmpZero = N1C->getAPIntValue().isNullValue();
3980       bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
3981       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
3982         // Match or(lo,shl(hi,bw/2)) pattern.
3983         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
3984           unsigned EltBits = V.getScalarValueSizeInBits();
3985           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
3986             return false;
3987           SDValue LHS = V.getOperand(0);
3988           SDValue RHS = V.getOperand(1);
3989           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
3990           // Unshifted element must have zero upperbits.
3991           if (RHS.getOpcode() == ISD::SHL &&
3992               isa<ConstantSDNode>(RHS.getOperand(1)) &&
3993               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
3994               DAG.MaskedValueIsZero(LHS, HiBits)) {
3995             Lo = LHS;
3996             Hi = RHS.getOperand(0);
3997             return true;
3998           }
3999           if (LHS.getOpcode() == ISD::SHL &&
4000               isa<ConstantSDNode>(LHS.getOperand(1)) &&
4001               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4002               DAG.MaskedValueIsZero(RHS, HiBits)) {
4003             Lo = RHS;
4004             Hi = LHS.getOperand(0);
4005             return true;
4006           }
4007           return false;
4008         };
4009 
4010         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4011           unsigned EltBits = N0.getScalarValueSizeInBits();
4012           unsigned HalfBits = EltBits / 2;
4013           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4014           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4015           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4016           SDValue NewN0 =
4017               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4018           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4019           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4020         };
4021 
4022         SDValue Lo, Hi;
4023         if (IsConcat(N0, Lo, Hi))
4024           return MergeConcat(Lo, Hi);
4025 
4026         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4027           SDValue Lo0, Lo1, Hi0, Hi1;
4028           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4029               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4030             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4031                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4032           }
4033         }
4034       }
4035     }
4036 
4037     // If we have "setcc X, C0", check to see if we can shrink the immediate
4038     // by changing cc.
4039     // TODO: Support this for vectors after legalize ops.
4040     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4041       // SETUGT X, SINTMAX  -> SETLT X, 0
4042       // SETUGE X, SINTMIN -> SETLT X, 0
4043       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4044           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4045         return DAG.getSetCC(dl, VT, N0,
4046                             DAG.getConstant(0, dl, N1.getValueType()),
4047                             ISD::SETLT);
4048 
4049       // SETULT X, SINTMIN  -> SETGT X, -1
4050       // SETULE X, SINTMAX  -> SETGT X, -1
4051       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4052           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4053         return DAG.getSetCC(dl, VT, N0,
4054                             DAG.getAllOnesConstant(dl, N1.getValueType()),
4055                             ISD::SETGT);
4056     }
4057   }
4058 
4059   // Back to non-vector simplifications.
4060   // TODO: Can we do these for vector splats?
4061   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4062     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4063     const APInt &C1 = N1C->getAPIntValue();
4064     EVT ShValTy = N0.getValueType();
4065 
4066     // Fold bit comparisons when we can. This will result in an
4067     // incorrect value when boolean false is negative one, unless
4068     // the bitsize is 1 in which case the false value is the same
4069     // in practice regardless of the representation.
4070     if ((VT.getSizeInBits() == 1 ||
4071          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4072         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4073         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4074         N0.getOpcode() == ISD::AND) {
4075       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4076         EVT ShiftTy =
4077             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4078         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
4079           // Perform the xform if the AND RHS is a single bit.
4080           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4081           if (AndRHS->getAPIntValue().isPowerOf2() &&
4082               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4083             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4084                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4085                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4086           }
4087         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4088           // (X & 8) == 8  -->  (X & 8) >> 3
4089           // Perform the xform if C1 is a single bit.
4090           unsigned ShCt = C1.logBase2();
4091           if (C1.isPowerOf2() &&
4092               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4093             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4094                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4095                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4096           }
4097         }
4098       }
4099     }
4100 
4101     if (C1.getMinSignedBits() <= 64 &&
4102         !isLegalICmpImmediate(C1.getSExtValue())) {
4103       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4104       // (X & -256) == 256 -> (X >> 8) == 1
4105       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4106           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4107         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4108           const APInt &AndRHSC = AndRHS->getAPIntValue();
4109           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
4110             unsigned ShiftBits = AndRHSC.countTrailingZeros();
4111             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4112               SDValue Shift =
4113                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4114                             DAG.getConstant(ShiftBits, dl, ShiftTy));
4115               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4116               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4117             }
4118           }
4119         }
4120       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4121                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4122         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4123         // X <  0x100000000 -> (X >> 32) <  1
4124         // X >= 0x100000000 -> (X >> 32) >= 1
4125         // X <= 0x0ffffffff -> (X >> 32) <  1
4126         // X >  0x0ffffffff -> (X >> 32) >= 1
4127         unsigned ShiftBits;
4128         APInt NewC = C1;
4129         ISD::CondCode NewCond = Cond;
4130         if (AdjOne) {
4131           ShiftBits = C1.countTrailingOnes();
4132           NewC = NewC + 1;
4133           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4134         } else {
4135           ShiftBits = C1.countTrailingZeros();
4136         }
4137         NewC.lshrInPlace(ShiftBits);
4138         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4139             isLegalICmpImmediate(NewC.getSExtValue()) &&
4140             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4141           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4142                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
4143           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4144           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4145         }
4146       }
4147     }
4148   }
4149 
4150   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4151     auto *CFP = cast<ConstantFPSDNode>(N1);
4152     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4153 
4154     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
4155     // constant if knowing that the operand is non-nan is enough.  We prefer to
4156     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4157     // materialize 0.0.
4158     if (Cond == ISD::SETO || Cond == ISD::SETUO)
4159       return DAG.getSetCC(dl, VT, N0, N0, Cond);
4160 
4161     // setcc (fneg x), C -> setcc swap(pred) x, -C
4162     if (N0.getOpcode() == ISD::FNEG) {
4163       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
4164       if (DCI.isBeforeLegalizeOps() ||
4165           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4166         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4167         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4168       }
4169     }
4170 
4171     // If the condition is not legal, see if we can find an equivalent one
4172     // which is legal.
4173     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4174       // If the comparison was an awkward floating-point == or != and one of
4175       // the comparison operands is infinity or negative infinity, convert the
4176       // condition to a less-awkward <= or >=.
4177       if (CFP->getValueAPF().isInfinity()) {
4178         bool IsNegInf = CFP->getValueAPF().isNegative();
4179         ISD::CondCode NewCond = ISD::SETCC_INVALID;
4180         switch (Cond) {
4181         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4182         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4183         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4184         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4185         default: break;
4186         }
4187         if (NewCond != ISD::SETCC_INVALID &&
4188             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4189           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4190       }
4191     }
4192   }
4193 
4194   if (N0 == N1) {
4195     // The sext(setcc()) => setcc() optimization relies on the appropriate
4196     // constant being emitted.
4197     assert(!N0.getValueType().isInteger() &&
4198            "Integer types should be handled by FoldSetCC");
4199 
4200     bool EqTrue = ISD::isTrueWhenEqual(Cond);
4201     unsigned UOF = ISD::getUnorderedFlavor(Cond);
4202     if (UOF == 2) // FP operators that are undefined on NaNs.
4203       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4204     if (UOF == unsigned(EqTrue))
4205       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4206     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
4207     // if it is not already.
4208     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4209     if (NewCond != Cond &&
4210         (DCI.isBeforeLegalizeOps() ||
4211                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4212       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4213   }
4214 
4215   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4216       N0.getValueType().isInteger()) {
4217     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4218         N0.getOpcode() == ISD::XOR) {
4219       // Simplify (X+Y) == (X+Z) -->  Y == Z
4220       if (N0.getOpcode() == N1.getOpcode()) {
4221         if (N0.getOperand(0) == N1.getOperand(0))
4222           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4223         if (N0.getOperand(1) == N1.getOperand(1))
4224           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4225         if (isCommutativeBinOp(N0.getOpcode())) {
4226           // If X op Y == Y op X, try other combinations.
4227           if (N0.getOperand(0) == N1.getOperand(1))
4228             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4229                                 Cond);
4230           if (N0.getOperand(1) == N1.getOperand(0))
4231             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4232                                 Cond);
4233         }
4234       }
4235 
4236       // If RHS is a legal immediate value for a compare instruction, we need
4237       // to be careful about increasing register pressure needlessly.
4238       bool LegalRHSImm = false;
4239 
4240       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4241         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4242           // Turn (X+C1) == C2 --> X == C2-C1
4243           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
4244             return DAG.getSetCC(dl, VT, N0.getOperand(0),
4245                                 DAG.getConstant(RHSC->getAPIntValue()-
4246                                                 LHSR->getAPIntValue(),
4247                                 dl, N0.getValueType()), Cond);
4248           }
4249 
4250           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4251           if (N0.getOpcode() == ISD::XOR)
4252             // If we know that all of the inverted bits are zero, don't bother
4253             // performing the inversion.
4254             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
4255               return
4256                 DAG.getSetCC(dl, VT, N0.getOperand(0),
4257                              DAG.getConstant(LHSR->getAPIntValue() ^
4258                                                RHSC->getAPIntValue(),
4259                                              dl, N0.getValueType()),
4260                              Cond);
4261         }
4262 
4263         // Turn (C1-X) == C2 --> X == C1-C2
4264         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
4265           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
4266             return
4267               DAG.getSetCC(dl, VT, N0.getOperand(1),
4268                            DAG.getConstant(SUBC->getAPIntValue() -
4269                                              RHSC->getAPIntValue(),
4270                                            dl, N0.getValueType()),
4271                            Cond);
4272           }
4273         }
4274 
4275         // Could RHSC fold directly into a compare?
4276         if (RHSC->getValueType(0).getSizeInBits() <= 64)
4277           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4278       }
4279 
4280       // (X+Y) == X --> Y == 0 and similar folds.
4281       // Don't do this if X is an immediate that can fold into a cmp
4282       // instruction and X+Y has other uses. It could be an induction variable
4283       // chain, and the transform would increase register pressure.
4284       if (!LegalRHSImm || N0.hasOneUse())
4285         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4286           return V;
4287     }
4288 
4289     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4290         N1.getOpcode() == ISD::XOR)
4291       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4292         return V;
4293 
4294     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4295       return V;
4296   }
4297 
4298   // Fold remainder of division by a constant.
4299   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4300       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4301     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4302 
4303     // When division is cheap or optimizing for minimum size,
4304     // fall through to DIVREM creation by skipping this fold.
4305     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
4306       if (N0.getOpcode() == ISD::UREM) {
4307         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4308           return Folded;
4309       } else if (N0.getOpcode() == ISD::SREM) {
4310         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4311           return Folded;
4312       }
4313     }
4314   }
4315 
4316   // Fold away ALL boolean setcc's.
4317   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4318     SDValue Temp;
4319     switch (Cond) {
4320     default: llvm_unreachable("Unknown integer setcc!");
4321     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
4322       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4323       N0 = DAG.getNOT(dl, Temp, OpVT);
4324       if (!DCI.isCalledByLegalizer())
4325         DCI.AddToWorklist(Temp.getNode());
4326       break;
4327     case ISD::SETNE:  // X != Y   -->  (X^Y)
4328       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4329       break;
4330     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
4331     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
4332       Temp = DAG.getNOT(dl, N0, OpVT);
4333       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4334       if (!DCI.isCalledByLegalizer())
4335         DCI.AddToWorklist(Temp.getNode());
4336       break;
4337     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
4338     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
4339       Temp = DAG.getNOT(dl, N1, OpVT);
4340       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4341       if (!DCI.isCalledByLegalizer())
4342         DCI.AddToWorklist(Temp.getNode());
4343       break;
4344     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
4345     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
4346       Temp = DAG.getNOT(dl, N0, OpVT);
4347       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4348       if (!DCI.isCalledByLegalizer())
4349         DCI.AddToWorklist(Temp.getNode());
4350       break;
4351     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
4352     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
4353       Temp = DAG.getNOT(dl, N1, OpVT);
4354       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4355       break;
4356     }
4357     if (VT.getScalarType() != MVT::i1) {
4358       if (!DCI.isCalledByLegalizer())
4359         DCI.AddToWorklist(N0.getNode());
4360       // FIXME: If running after legalize, we probably can't do this.
4361       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4362       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4363     }
4364     return N0;
4365   }
4366 
4367   // Could not fold it.
4368   return SDValue();
4369 }
4370 
4371 /// Returns true (and the GlobalValue and the offset) if the node is a
4372 /// GlobalAddress + offset.
4373 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4374                                     int64_t &Offset) const {
4375 
4376   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4377 
4378   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4379     GA = GASD->getGlobal();
4380     Offset += GASD->getOffset();
4381     return true;
4382   }
4383 
4384   if (N->getOpcode() == ISD::ADD) {
4385     SDValue N1 = N->getOperand(0);
4386     SDValue N2 = N->getOperand(1);
4387     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4388       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4389         Offset += V->getSExtValue();
4390         return true;
4391       }
4392     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4393       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4394         Offset += V->getSExtValue();
4395         return true;
4396       }
4397     }
4398   }
4399 
4400   return false;
4401 }
4402 
4403 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4404                                           DAGCombinerInfo &DCI) const {
4405   // Default implementation: no optimization.
4406   return SDValue();
4407 }
4408 
4409 //===----------------------------------------------------------------------===//
4410 //  Inline Assembler Implementation Methods
4411 //===----------------------------------------------------------------------===//
4412 
4413 TargetLowering::ConstraintType
4414 TargetLowering::getConstraintType(StringRef Constraint) const {
4415   unsigned S = Constraint.size();
4416 
4417   if (S == 1) {
4418     switch (Constraint[0]) {
4419     default: break;
4420     case 'r':
4421       return C_RegisterClass;
4422     case 'm': // memory
4423     case 'o': // offsetable
4424     case 'V': // not offsetable
4425       return C_Memory;
4426     case 'n': // Simple Integer
4427     case 'E': // Floating Point Constant
4428     case 'F': // Floating Point Constant
4429       return C_Immediate;
4430     case 'i': // Simple Integer or Relocatable Constant
4431     case 's': // Relocatable Constant
4432     case 'p': // Address.
4433     case 'X': // Allow ANY value.
4434     case 'I': // Target registers.
4435     case 'J':
4436     case 'K':
4437     case 'L':
4438     case 'M':
4439     case 'N':
4440     case 'O':
4441     case 'P':
4442     case '<':
4443     case '>':
4444       return C_Other;
4445     }
4446   }
4447 
4448   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4449     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4450       return C_Memory;
4451     return C_Register;
4452   }
4453   return C_Unknown;
4454 }
4455 
4456 /// Try to replace an X constraint, which matches anything, with another that
4457 /// has more specific requirements based on the type of the corresponding
4458 /// operand.
4459 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4460   if (ConstraintVT.isInteger())
4461     return "r";
4462   if (ConstraintVT.isFloatingPoint())
4463     return "f"; // works for many targets
4464   return nullptr;
4465 }
4466 
4467 SDValue TargetLowering::LowerAsmOutputForConstraint(
4468     SDValue &Chain, SDValue &Flag, const SDLoc &DL,
4469     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
4470   return SDValue();
4471 }
4472 
4473 /// Lower the specified operand into the Ops vector.
4474 /// If it is invalid, don't add anything to Ops.
4475 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4476                                                   std::string &Constraint,
4477                                                   std::vector<SDValue> &Ops,
4478                                                   SelectionDAG &DAG) const {
4479 
4480   if (Constraint.length() > 1) return;
4481 
4482   char ConstraintLetter = Constraint[0];
4483   switch (ConstraintLetter) {
4484   default: break;
4485   case 'X':     // Allows any operand; labels (basic block) use this.
4486     if (Op.getOpcode() == ISD::BasicBlock ||
4487         Op.getOpcode() == ISD::TargetBlockAddress) {
4488       Ops.push_back(Op);
4489       return;
4490     }
4491     LLVM_FALLTHROUGH;
4492   case 'i':    // Simple Integer or Relocatable Constant
4493   case 'n':    // Simple Integer
4494   case 's': {  // Relocatable Constant
4495 
4496     GlobalAddressSDNode *GA;
4497     ConstantSDNode *C;
4498     BlockAddressSDNode *BA;
4499     uint64_t Offset = 0;
4500 
4501     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4502     // etc., since getelementpointer is variadic. We can't use
4503     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4504     // while in this case the GA may be furthest from the root node which is
4505     // likely an ISD::ADD.
4506     while (1) {
4507       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4508         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4509                                                  GA->getValueType(0),
4510                                                  Offset + GA->getOffset()));
4511         return;
4512       }
4513       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
4514         // gcc prints these as sign extended.  Sign extend value to 64 bits
4515         // now; without this it would get ZExt'd later in
4516         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4517         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4518         BooleanContent BCont = getBooleanContents(MVT::i64);
4519         ISD::NodeType ExtOpc =
4520             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
4521         int64_t ExtVal =
4522             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
4523         Ops.push_back(
4524             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
4525         return;
4526       }
4527       if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4528         Ops.push_back(DAG.getTargetBlockAddress(
4529             BA->getBlockAddress(), BA->getValueType(0),
4530             Offset + BA->getOffset(), BA->getTargetFlags()));
4531         return;
4532       }
4533       const unsigned OpCode = Op.getOpcode();
4534       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4535         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4536           Op = Op.getOperand(1);
4537         // Subtraction is not commutative.
4538         else if (OpCode == ISD::ADD &&
4539                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4540           Op = Op.getOperand(0);
4541         else
4542           return;
4543         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4544         continue;
4545       }
4546       return;
4547     }
4548     break;
4549   }
4550   }
4551 }
4552 
4553 std::pair<unsigned, const TargetRegisterClass *>
4554 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4555                                              StringRef Constraint,
4556                                              MVT VT) const {
4557   if (Constraint.empty() || Constraint[0] != '{')
4558     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4559   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4560 
4561   // Remove the braces from around the name.
4562   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4563 
4564   std::pair<unsigned, const TargetRegisterClass *> R =
4565       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4566 
4567   // Figure out which register class contains this reg.
4568   for (const TargetRegisterClass *RC : RI->regclasses()) {
4569     // If none of the value types for this register class are valid, we
4570     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4571     if (!isLegalRC(*RI, *RC))
4572       continue;
4573 
4574     for (const MCPhysReg &PR : *RC) {
4575       if (RegName.equals_lower(RI->getRegAsmName(PR))) {
4576         std::pair<unsigned, const TargetRegisterClass *> S =
4577             std::make_pair(PR, RC);
4578 
4579         // If this register class has the requested value type, return it,
4580         // otherwise keep searching and return the first class found
4581         // if no other is found which explicitly has the requested type.
4582         if (RI->isTypeLegalForClass(*RC, VT))
4583           return S;
4584         if (!R.second)
4585           R = S;
4586       }
4587     }
4588   }
4589 
4590   return R;
4591 }
4592 
4593 //===----------------------------------------------------------------------===//
4594 // Constraint Selection.
4595 
4596 /// Return true of this is an input operand that is a matching constraint like
4597 /// "4".
4598 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4599   assert(!ConstraintCode.empty() && "No known constraint!");
4600   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4601 }
4602 
4603 /// If this is an input matching constraint, this method returns the output
4604 /// operand it matches.
4605 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4606   assert(!ConstraintCode.empty() && "No known constraint!");
4607   return atoi(ConstraintCode.c_str());
4608 }
4609 
4610 /// Split up the constraint string from the inline assembly value into the
4611 /// specific constraints and their prefixes, and also tie in the associated
4612 /// operand values.
4613 /// If this returns an empty vector, and if the constraint string itself
4614 /// isn't empty, there was an error parsing.
4615 TargetLowering::AsmOperandInfoVector
4616 TargetLowering::ParseConstraints(const DataLayout &DL,
4617                                  const TargetRegisterInfo *TRI,
4618                                  const CallBase &Call) const {
4619   /// Information about all of the constraints.
4620   AsmOperandInfoVector ConstraintOperands;
4621   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
4622   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4623 
4624   // Do a prepass over the constraints, canonicalizing them, and building up the
4625   // ConstraintOperands list.
4626   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4627   unsigned ResNo = 0; // ResNo - The result number of the next output.
4628 
4629   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4630     ConstraintOperands.emplace_back(std::move(CI));
4631     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4632 
4633     // Update multiple alternative constraint count.
4634     if (OpInfo.multipleAlternatives.size() > maCount)
4635       maCount = OpInfo.multipleAlternatives.size();
4636 
4637     OpInfo.ConstraintVT = MVT::Other;
4638 
4639     // Compute the value type for each operand.
4640     switch (OpInfo.Type) {
4641     case InlineAsm::isOutput:
4642       // Indirect outputs just consume an argument.
4643       if (OpInfo.isIndirect) {
4644         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4645         break;
4646       }
4647 
4648       // The return value of the call is this value.  As such, there is no
4649       // corresponding argument.
4650       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
4651       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
4652         OpInfo.ConstraintVT =
4653             getSimpleValueType(DL, STy->getElementType(ResNo));
4654       } else {
4655         assert(ResNo == 0 && "Asm only has one result!");
4656         OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
4657       }
4658       ++ResNo;
4659       break;
4660     case InlineAsm::isInput:
4661       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4662       break;
4663     case InlineAsm::isClobber:
4664       // Nothing to do.
4665       break;
4666     }
4667 
4668     if (OpInfo.CallOperandVal) {
4669       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4670       if (OpInfo.isIndirect) {
4671         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4672         if (!PtrTy)
4673           report_fatal_error("Indirect operand for inline asm not a pointer!");
4674         OpTy = PtrTy->getElementType();
4675       }
4676 
4677       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4678       if (StructType *STy = dyn_cast<StructType>(OpTy))
4679         if (STy->getNumElements() == 1)
4680           OpTy = STy->getElementType(0);
4681 
4682       // If OpTy is not a single value, it may be a struct/union that we
4683       // can tile with integers.
4684       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4685         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4686         switch (BitSize) {
4687         default: break;
4688         case 1:
4689         case 8:
4690         case 16:
4691         case 32:
4692         case 64:
4693         case 128:
4694           OpInfo.ConstraintVT =
4695               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4696           break;
4697         }
4698       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4699         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4700         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4701       } else {
4702         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4703       }
4704     }
4705   }
4706 
4707   // If we have multiple alternative constraints, select the best alternative.
4708   if (!ConstraintOperands.empty()) {
4709     if (maCount) {
4710       unsigned bestMAIndex = 0;
4711       int bestWeight = -1;
4712       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4713       int weight = -1;
4714       unsigned maIndex;
4715       // Compute the sums of the weights for each alternative, keeping track
4716       // of the best (highest weight) one so far.
4717       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4718         int weightSum = 0;
4719         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4720              cIndex != eIndex; ++cIndex) {
4721           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4722           if (OpInfo.Type == InlineAsm::isClobber)
4723             continue;
4724 
4725           // If this is an output operand with a matching input operand,
4726           // look up the matching input. If their types mismatch, e.g. one
4727           // is an integer, the other is floating point, or their sizes are
4728           // different, flag it as an maCantMatch.
4729           if (OpInfo.hasMatchingInput()) {
4730             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4731             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4732               if ((OpInfo.ConstraintVT.isInteger() !=
4733                    Input.ConstraintVT.isInteger()) ||
4734                   (OpInfo.ConstraintVT.getSizeInBits() !=
4735                    Input.ConstraintVT.getSizeInBits())) {
4736                 weightSum = -1; // Can't match.
4737                 break;
4738               }
4739             }
4740           }
4741           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4742           if (weight == -1) {
4743             weightSum = -1;
4744             break;
4745           }
4746           weightSum += weight;
4747         }
4748         // Update best.
4749         if (weightSum > bestWeight) {
4750           bestWeight = weightSum;
4751           bestMAIndex = maIndex;
4752         }
4753       }
4754 
4755       // Now select chosen alternative in each constraint.
4756       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4757            cIndex != eIndex; ++cIndex) {
4758         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4759         if (cInfo.Type == InlineAsm::isClobber)
4760           continue;
4761         cInfo.selectAlternative(bestMAIndex);
4762       }
4763     }
4764   }
4765 
4766   // Check and hook up tied operands, choose constraint code to use.
4767   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4768        cIndex != eIndex; ++cIndex) {
4769     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4770 
4771     // If this is an output operand with a matching input operand, look up the
4772     // matching input. If their types mismatch, e.g. one is an integer, the
4773     // other is floating point, or their sizes are different, flag it as an
4774     // error.
4775     if (OpInfo.hasMatchingInput()) {
4776       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4777 
4778       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4779         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4780             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4781                                          OpInfo.ConstraintVT);
4782         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4783             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4784                                          Input.ConstraintVT);
4785         if ((OpInfo.ConstraintVT.isInteger() !=
4786              Input.ConstraintVT.isInteger()) ||
4787             (MatchRC.second != InputRC.second)) {
4788           report_fatal_error("Unsupported asm: input constraint"
4789                              " with a matching output constraint of"
4790                              " incompatible type!");
4791         }
4792       }
4793     }
4794   }
4795 
4796   return ConstraintOperands;
4797 }
4798 
4799 /// Return an integer indicating how general CT is.
4800 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4801   switch (CT) {
4802   case TargetLowering::C_Immediate:
4803   case TargetLowering::C_Other:
4804   case TargetLowering::C_Unknown:
4805     return 0;
4806   case TargetLowering::C_Register:
4807     return 1;
4808   case TargetLowering::C_RegisterClass:
4809     return 2;
4810   case TargetLowering::C_Memory:
4811     return 3;
4812   }
4813   llvm_unreachable("Invalid constraint type");
4814 }
4815 
4816 /// Examine constraint type and operand type and determine a weight value.
4817 /// This object must already have been set up with the operand type
4818 /// and the current alternative constraint selected.
4819 TargetLowering::ConstraintWeight
4820   TargetLowering::getMultipleConstraintMatchWeight(
4821     AsmOperandInfo &info, int maIndex) const {
4822   InlineAsm::ConstraintCodeVector *rCodes;
4823   if (maIndex >= (int)info.multipleAlternatives.size())
4824     rCodes = &info.Codes;
4825   else
4826     rCodes = &info.multipleAlternatives[maIndex].Codes;
4827   ConstraintWeight BestWeight = CW_Invalid;
4828 
4829   // Loop over the options, keeping track of the most general one.
4830   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4831     ConstraintWeight weight =
4832       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4833     if (weight > BestWeight)
4834       BestWeight = weight;
4835   }
4836 
4837   return BestWeight;
4838 }
4839 
4840 /// Examine constraint type and operand type and determine a weight value.
4841 /// This object must already have been set up with the operand type
4842 /// and the current alternative constraint selected.
4843 TargetLowering::ConstraintWeight
4844   TargetLowering::getSingleConstraintMatchWeight(
4845     AsmOperandInfo &info, const char *constraint) const {
4846   ConstraintWeight weight = CW_Invalid;
4847   Value *CallOperandVal = info.CallOperandVal;
4848     // If we don't have a value, we can't do a match,
4849     // but allow it at the lowest weight.
4850   if (!CallOperandVal)
4851     return CW_Default;
4852   // Look at the constraint type.
4853   switch (*constraint) {
4854     case 'i': // immediate integer.
4855     case 'n': // immediate integer with a known value.
4856       if (isa<ConstantInt>(CallOperandVal))
4857         weight = CW_Constant;
4858       break;
4859     case 's': // non-explicit intregal immediate.
4860       if (isa<GlobalValue>(CallOperandVal))
4861         weight = CW_Constant;
4862       break;
4863     case 'E': // immediate float if host format.
4864     case 'F': // immediate float.
4865       if (isa<ConstantFP>(CallOperandVal))
4866         weight = CW_Constant;
4867       break;
4868     case '<': // memory operand with autodecrement.
4869     case '>': // memory operand with autoincrement.
4870     case 'm': // memory operand.
4871     case 'o': // offsettable memory operand
4872     case 'V': // non-offsettable memory operand
4873       weight = CW_Memory;
4874       break;
4875     case 'r': // general register.
4876     case 'g': // general register, memory operand or immediate integer.
4877               // note: Clang converts "g" to "imr".
4878       if (CallOperandVal->getType()->isIntegerTy())
4879         weight = CW_Register;
4880       break;
4881     case 'X': // any operand.
4882   default:
4883     weight = CW_Default;
4884     break;
4885   }
4886   return weight;
4887 }
4888 
4889 /// If there are multiple different constraints that we could pick for this
4890 /// operand (e.g. "imr") try to pick the 'best' one.
4891 /// This is somewhat tricky: constraints fall into four classes:
4892 ///    Other         -> immediates and magic values
4893 ///    Register      -> one specific register
4894 ///    RegisterClass -> a group of regs
4895 ///    Memory        -> memory
4896 /// Ideally, we would pick the most specific constraint possible: if we have
4897 /// something that fits into a register, we would pick it.  The problem here
4898 /// is that if we have something that could either be in a register or in
4899 /// memory that use of the register could cause selection of *other*
4900 /// operands to fail: they might only succeed if we pick memory.  Because of
4901 /// this the heuristic we use is:
4902 ///
4903 ///  1) If there is an 'other' constraint, and if the operand is valid for
4904 ///     that constraint, use it.  This makes us take advantage of 'i'
4905 ///     constraints when available.
4906 ///  2) Otherwise, pick the most general constraint present.  This prefers
4907 ///     'm' over 'r', for example.
4908 ///
4909 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4910                              const TargetLowering &TLI,
4911                              SDValue Op, SelectionDAG *DAG) {
4912   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4913   unsigned BestIdx = 0;
4914   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4915   int BestGenerality = -1;
4916 
4917   // Loop over the options, keeping track of the most general one.
4918   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4919     TargetLowering::ConstraintType CType =
4920       TLI.getConstraintType(OpInfo.Codes[i]);
4921 
4922     // Indirect 'other' or 'immediate' constraints are not allowed.
4923     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4924                                CType == TargetLowering::C_Register ||
4925                                CType == TargetLowering::C_RegisterClass))
4926       continue;
4927 
4928     // If this is an 'other' or 'immediate' constraint, see if the operand is
4929     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4930     // the operand is an integer in the range [0..31] we want to use I (saving a
4931     // load of a register), otherwise we must use 'r'.
4932     if ((CType == TargetLowering::C_Other ||
4933          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4934       assert(OpInfo.Codes[i].size() == 1 &&
4935              "Unhandled multi-letter 'other' constraint");
4936       std::vector<SDValue> ResultOps;
4937       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4938                                        ResultOps, *DAG);
4939       if (!ResultOps.empty()) {
4940         BestType = CType;
4941         BestIdx = i;
4942         break;
4943       }
4944     }
4945 
4946     // Things with matching constraints can only be registers, per gcc
4947     // documentation.  This mainly affects "g" constraints.
4948     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4949       continue;
4950 
4951     // This constraint letter is more general than the previous one, use it.
4952     int Generality = getConstraintGenerality(CType);
4953     if (Generality > BestGenerality) {
4954       BestType = CType;
4955       BestIdx = i;
4956       BestGenerality = Generality;
4957     }
4958   }
4959 
4960   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4961   OpInfo.ConstraintType = BestType;
4962 }
4963 
4964 /// Determines the constraint code and constraint type to use for the specific
4965 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4966 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4967                                             SDValue Op,
4968                                             SelectionDAG *DAG) const {
4969   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4970 
4971   // Single-letter constraints ('r') are very common.
4972   if (OpInfo.Codes.size() == 1) {
4973     OpInfo.ConstraintCode = OpInfo.Codes[0];
4974     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4975   } else {
4976     ChooseConstraint(OpInfo, *this, Op, DAG);
4977   }
4978 
4979   // 'X' matches anything.
4980   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4981     // Labels and constants are handled elsewhere ('X' is the only thing
4982     // that matches labels).  For Functions, the type here is the type of
4983     // the result, which is not what we want to look at; leave them alone.
4984     Value *v = OpInfo.CallOperandVal;
4985     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4986       OpInfo.CallOperandVal = v;
4987       return;
4988     }
4989 
4990     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4991       return;
4992 
4993     // Otherwise, try to resolve it to something we know about by looking at
4994     // the actual operand type.
4995     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4996       OpInfo.ConstraintCode = Repl;
4997       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4998     }
4999   }
5000 }
5001 
5002 /// Given an exact SDIV by a constant, create a multiplication
5003 /// with the multiplicative inverse of the constant.
5004 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
5005                               const SDLoc &dl, SelectionDAG &DAG,
5006                               SmallVectorImpl<SDNode *> &Created) {
5007   SDValue Op0 = N->getOperand(0);
5008   SDValue Op1 = N->getOperand(1);
5009   EVT VT = N->getValueType(0);
5010   EVT SVT = VT.getScalarType();
5011   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
5012   EVT ShSVT = ShVT.getScalarType();
5013 
5014   bool UseSRA = false;
5015   SmallVector<SDValue, 16> Shifts, Factors;
5016 
5017   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5018     if (C->isNullValue())
5019       return false;
5020     APInt Divisor = C->getAPIntValue();
5021     unsigned Shift = Divisor.countTrailingZeros();
5022     if (Shift) {
5023       Divisor.ashrInPlace(Shift);
5024       UseSRA = true;
5025     }
5026     // Calculate the multiplicative inverse, using Newton's method.
5027     APInt t;
5028     APInt Factor = Divisor;
5029     while ((t = Divisor * Factor) != 1)
5030       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
5031     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
5032     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
5033     return true;
5034   };
5035 
5036   // Collect all magic values from the build vector.
5037   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
5038     return SDValue();
5039 
5040   SDValue Shift, Factor;
5041   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
5042     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5043     Factor = DAG.getBuildVector(VT, dl, Factors);
5044   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
5045     assert(Shifts.size() == 1 && Factors.size() == 1 &&
5046            "Expected matchUnaryPredicate to return one element for scalable "
5047            "vectors");
5048     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5049     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5050   } else {
5051     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
5052     Shift = Shifts[0];
5053     Factor = Factors[0];
5054   }
5055 
5056   SDValue Res = Op0;
5057 
5058   // Shift the value upfront if it is even, so the LSB is one.
5059   if (UseSRA) {
5060     // TODO: For UDIV use SRL instead of SRA.
5061     SDNodeFlags Flags;
5062     Flags.setExact(true);
5063     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
5064     Created.push_back(Res.getNode());
5065   }
5066 
5067   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
5068 }
5069 
5070 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5071                               SelectionDAG &DAG,
5072                               SmallVectorImpl<SDNode *> &Created) const {
5073   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5074   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5075   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5076     return SDValue(N, 0); // Lower SDIV as SDIV
5077   return SDValue();
5078 }
5079 
5080 /// Given an ISD::SDIV node expressing a divide by constant,
5081 /// return a DAG expression to select that will generate the same value by
5082 /// multiplying by a magic number.
5083 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5084 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
5085                                   bool IsAfterLegalization,
5086                                   SmallVectorImpl<SDNode *> &Created) const {
5087   SDLoc dl(N);
5088   EVT VT = N->getValueType(0);
5089   EVT SVT = VT.getScalarType();
5090   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5091   EVT ShSVT = ShVT.getScalarType();
5092   unsigned EltBits = VT.getScalarSizeInBits();
5093   EVT MulVT;
5094 
5095   // Check to see if we can do this.
5096   // FIXME: We should be more aggressive here.
5097   if (!isTypeLegal(VT)) {
5098     // Limit this to simple scalars for now.
5099     if (VT.isVector() || !VT.isSimple())
5100       return SDValue();
5101 
5102     // If this type will be promoted to a large enough type with a legal
5103     // multiply operation, we can go ahead and do this transform.
5104     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5105       return SDValue();
5106 
5107     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5108     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5109         !isOperationLegal(ISD::MUL, MulVT))
5110       return SDValue();
5111   }
5112 
5113   // If the sdiv has an 'exact' bit we can use a simpler lowering.
5114   if (N->getFlags().hasExact())
5115     return BuildExactSDIV(*this, N, dl, DAG, Created);
5116 
5117   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5118 
5119   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5120     if (C->isNullValue())
5121       return false;
5122 
5123     const APInt &Divisor = C->getAPIntValue();
5124     APInt::ms magics = Divisor.magic();
5125     int NumeratorFactor = 0;
5126     int ShiftMask = -1;
5127 
5128     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
5129       // If d is +1/-1, we just multiply the numerator by +1/-1.
5130       NumeratorFactor = Divisor.getSExtValue();
5131       magics.m = 0;
5132       magics.s = 0;
5133       ShiftMask = 0;
5134     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
5135       // If d > 0 and m < 0, add the numerator.
5136       NumeratorFactor = 1;
5137     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
5138       // If d < 0 and m > 0, subtract the numerator.
5139       NumeratorFactor = -1;
5140     }
5141 
5142     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
5143     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
5144     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
5145     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
5146     return true;
5147   };
5148 
5149   SDValue N0 = N->getOperand(0);
5150   SDValue N1 = N->getOperand(1);
5151 
5152   // Collect the shifts / magic values from each element.
5153   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
5154     return SDValue();
5155 
5156   SDValue MagicFactor, Factor, Shift, ShiftMask;
5157   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5158     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5159     Factor = DAG.getBuildVector(VT, dl, Factors);
5160     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5161     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
5162   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5163     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
5164            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
5165            "Expected matchUnaryPredicate to return one element for scalable "
5166            "vectors");
5167     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5168     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5169     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5170     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
5171   } else {
5172     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5173     MagicFactor = MagicFactors[0];
5174     Factor = Factors[0];
5175     Shift = Shifts[0];
5176     ShiftMask = ShiftMasks[0];
5177   }
5178 
5179   // Multiply the numerator (operand 0) by the magic value.
5180   // FIXME: We should support doing a MUL in a wider type.
5181   auto GetMULHS = [&](SDValue X, SDValue Y) {
5182     // If the type isn't legal, use a wider mul of the the type calculated
5183     // earlier.
5184     if (!isTypeLegal(VT)) {
5185       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
5186       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
5187       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5188       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5189                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5190       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5191     }
5192 
5193     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
5194       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
5195     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
5196       SDValue LoHi =
5197           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5198       return SDValue(LoHi.getNode(), 1);
5199     }
5200     return SDValue();
5201   };
5202 
5203   SDValue Q = GetMULHS(N0, MagicFactor);
5204   if (!Q)
5205     return SDValue();
5206 
5207   Created.push_back(Q.getNode());
5208 
5209   // (Optionally) Add/subtract the numerator using Factor.
5210   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
5211   Created.push_back(Factor.getNode());
5212   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5213   Created.push_back(Q.getNode());
5214 
5215   // Shift right algebraic by shift value.
5216   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5217   Created.push_back(Q.getNode());
5218 
5219   // Extract the sign bit, mask it and add it to the quotient.
5220   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5221   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5222   Created.push_back(T.getNode());
5223   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5224   Created.push_back(T.getNode());
5225   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5226 }
5227 
5228 /// Given an ISD::UDIV node expressing a divide by constant,
5229 /// return a DAG expression to select that will generate the same value by
5230 /// multiplying by a magic number.
5231 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5232 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5233                                   bool IsAfterLegalization,
5234                                   SmallVectorImpl<SDNode *> &Created) const {
5235   SDLoc dl(N);
5236   EVT VT = N->getValueType(0);
5237   EVT SVT = VT.getScalarType();
5238   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5239   EVT ShSVT = ShVT.getScalarType();
5240   unsigned EltBits = VT.getScalarSizeInBits();
5241   EVT MulVT;
5242 
5243   // Check to see if we can do this.
5244   // FIXME: We should be more aggressive here.
5245   if (!isTypeLegal(VT)) {
5246     // Limit this to simple scalars for now.
5247     if (VT.isVector() || !VT.isSimple())
5248       return SDValue();
5249 
5250     // If this type will be promoted to a large enough type with a legal
5251     // multiply operation, we can go ahead and do this transform.
5252     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5253       return SDValue();
5254 
5255     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5256     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5257         !isOperationLegal(ISD::MUL, MulVT))
5258       return SDValue();
5259   }
5260 
5261   bool UseNPQ = false;
5262   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5263 
5264   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5265     if (C->isNullValue())
5266       return false;
5267     // FIXME: We should use a narrower constant when the upper
5268     // bits are known to be zero.
5269     const APInt& Divisor = C->getAPIntValue();
5270     APInt::mu magics = Divisor.magicu();
5271     unsigned PreShift = 0, PostShift = 0;
5272 
5273     // If the divisor is even, we can avoid using the expensive fixup by
5274     // shifting the divided value upfront.
5275     if (magics.a != 0 && !Divisor[0]) {
5276       PreShift = Divisor.countTrailingZeros();
5277       // Get magic number for the shifted divisor.
5278       magics = Divisor.lshr(PreShift).magicu(PreShift);
5279       assert(magics.a == 0 && "Should use cheap fixup now");
5280     }
5281 
5282     APInt Magic = magics.m;
5283 
5284     unsigned SelNPQ;
5285     if (magics.a == 0 || Divisor.isOneValue()) {
5286       assert(magics.s < Divisor.getBitWidth() &&
5287              "We shouldn't generate an undefined shift!");
5288       PostShift = magics.s;
5289       SelNPQ = false;
5290     } else {
5291       PostShift = magics.s - 1;
5292       SelNPQ = true;
5293     }
5294 
5295     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5296     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5297     NPQFactors.push_back(
5298         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5299                                : APInt::getNullValue(EltBits),
5300                         dl, SVT));
5301     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5302     UseNPQ |= SelNPQ;
5303     return true;
5304   };
5305 
5306   SDValue N0 = N->getOperand(0);
5307   SDValue N1 = N->getOperand(1);
5308 
5309   // Collect the shifts/magic values from each element.
5310   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5311     return SDValue();
5312 
5313   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5314   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5315     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5316     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5317     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5318     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5319   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5320     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
5321            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
5322            "Expected matchUnaryPredicate to return one for scalable vectors");
5323     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
5324     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5325     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
5326     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
5327   } else {
5328     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5329     PreShift = PreShifts[0];
5330     MagicFactor = MagicFactors[0];
5331     PostShift = PostShifts[0];
5332   }
5333 
5334   SDValue Q = N0;
5335   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5336   Created.push_back(Q.getNode());
5337 
5338   // FIXME: We should support doing a MUL in a wider type.
5339   auto GetMULHU = [&](SDValue X, SDValue Y) {
5340     // If the type isn't legal, use a wider mul of the the type calculated
5341     // earlier.
5342     if (!isTypeLegal(VT)) {
5343       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
5344       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
5345       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5346       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5347                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5348       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5349     }
5350 
5351     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
5352       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5353     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
5354       SDValue LoHi =
5355           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5356       return SDValue(LoHi.getNode(), 1);
5357     }
5358     return SDValue(); // No mulhu or equivalent
5359   };
5360 
5361   // Multiply the numerator (operand 0) by the magic value.
5362   Q = GetMULHU(Q, MagicFactor);
5363   if (!Q)
5364     return SDValue();
5365 
5366   Created.push_back(Q.getNode());
5367 
5368   if (UseNPQ) {
5369     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5370     Created.push_back(NPQ.getNode());
5371 
5372     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5373     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5374     if (VT.isVector())
5375       NPQ = GetMULHU(NPQ, NPQFactor);
5376     else
5377       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5378 
5379     Created.push_back(NPQ.getNode());
5380 
5381     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5382     Created.push_back(Q.getNode());
5383   }
5384 
5385   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5386   Created.push_back(Q.getNode());
5387 
5388   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5389 
5390   SDValue One = DAG.getConstant(1, dl, VT);
5391   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
5392   return DAG.getSelect(dl, VT, IsOne, N0, Q);
5393 }
5394 
5395 /// If all values in Values that *don't* match the predicate are same 'splat'
5396 /// value, then replace all values with that splat value.
5397 /// Else, if AlternativeReplacement was provided, then replace all values that
5398 /// do match predicate with AlternativeReplacement value.
5399 static void
5400 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5401                           std::function<bool(SDValue)> Predicate,
5402                           SDValue AlternativeReplacement = SDValue()) {
5403   SDValue Replacement;
5404   // Is there a value for which the Predicate does *NOT* match? What is it?
5405   auto SplatValue = llvm::find_if_not(Values, Predicate);
5406   if (SplatValue != Values.end()) {
5407     // Does Values consist only of SplatValue's and values matching Predicate?
5408     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5409           return Value == *SplatValue || Predicate(Value);
5410         })) // Then we shall replace values matching predicate with SplatValue.
5411       Replacement = *SplatValue;
5412   }
5413   if (!Replacement) {
5414     // Oops, we did not find the "baseline" splat value.
5415     if (!AlternativeReplacement)
5416       return; // Nothing to do.
5417     // Let's replace with provided value then.
5418     Replacement = AlternativeReplacement;
5419   }
5420   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5421 }
5422 
5423 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5424 /// where the divisor is constant and the comparison target is zero,
5425 /// return a DAG expression that will generate the same comparison result
5426 /// using only multiplications, additions and shifts/rotations.
5427 /// Ref: "Hacker's Delight" 10-17.
5428 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5429                                         SDValue CompTargetNode,
5430                                         ISD::CondCode Cond,
5431                                         DAGCombinerInfo &DCI,
5432                                         const SDLoc &DL) const {
5433   SmallVector<SDNode *, 5> Built;
5434   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5435                                          DCI, DL, Built)) {
5436     for (SDNode *N : Built)
5437       DCI.AddToWorklist(N);
5438     return Folded;
5439   }
5440 
5441   return SDValue();
5442 }
5443 
5444 SDValue
5445 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5446                                   SDValue CompTargetNode, ISD::CondCode Cond,
5447                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5448                                   SmallVectorImpl<SDNode *> &Created) const {
5449   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5450   // - D must be constant, with D = D0 * 2^K where D0 is odd
5451   // - P is the multiplicative inverse of D0 modulo 2^W
5452   // - Q = floor(((2^W) - 1) / D)
5453   // where W is the width of the common type of N and D.
5454   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5455          "Only applicable for (in)equality comparisons.");
5456 
5457   SelectionDAG &DAG = DCI.DAG;
5458 
5459   EVT VT = REMNode.getValueType();
5460   EVT SVT = VT.getScalarType();
5461   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
5462   EVT ShSVT = ShVT.getScalarType();
5463 
5464   // If MUL is unavailable, we cannot proceed in any case.
5465   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
5466     return SDValue();
5467 
5468   bool ComparingWithAllZeros = true;
5469   bool AllComparisonsWithNonZerosAreTautological = true;
5470   bool HadTautologicalLanes = false;
5471   bool AllLanesAreTautological = true;
5472   bool HadEvenDivisor = false;
5473   bool AllDivisorsArePowerOfTwo = true;
5474   bool HadTautologicalInvertedLanes = false;
5475   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5476 
5477   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5478     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5479     if (CDiv->isNullValue())
5480       return false;
5481 
5482     const APInt &D = CDiv->getAPIntValue();
5483     const APInt &Cmp = CCmp->getAPIntValue();
5484 
5485     ComparingWithAllZeros &= Cmp.isNullValue();
5486 
5487     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5488     // if C2 is not less than C1, the comparison is always false.
5489     // But we will only be able to produce the comparison that will give the
5490     // opposive tautological answer. So this lane would need to be fixed up.
5491     bool TautologicalInvertedLane = D.ule(Cmp);
5492     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5493 
5494     // If all lanes are tautological (either all divisors are ones, or divisor
5495     // is not greater than the constant we are comparing with),
5496     // we will prefer to avoid the fold.
5497     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
5498     HadTautologicalLanes |= TautologicalLane;
5499     AllLanesAreTautological &= TautologicalLane;
5500 
5501     // If we are comparing with non-zero, we need'll need  to subtract said
5502     // comparison value from the LHS. But there is no point in doing that if
5503     // every lane where we are comparing with non-zero is tautological..
5504     if (!Cmp.isNullValue())
5505       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5506 
5507     // Decompose D into D0 * 2^K
5508     unsigned K = D.countTrailingZeros();
5509     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5510     APInt D0 = D.lshr(K);
5511 
5512     // D is even if it has trailing zeros.
5513     HadEvenDivisor |= (K != 0);
5514     // D is a power-of-two if D0 is one.
5515     // If all divisors are power-of-two, we will prefer to avoid the fold.
5516     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5517 
5518     // P = inv(D0, 2^W)
5519     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5520     unsigned W = D.getBitWidth();
5521     APInt P = D0.zext(W + 1)
5522                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5523                   .trunc(W);
5524     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5525     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5526 
5527     // Q = floor((2^W - 1) u/ D)
5528     // R = ((2^W - 1) u% D)
5529     APInt Q, R;
5530     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5531 
5532     // If we are comparing with zero, then that comparison constant is okay,
5533     // else it may need to be one less than that.
5534     if (Cmp.ugt(R))
5535       Q -= 1;
5536 
5537     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5538            "We are expecting that K is always less than all-ones for ShSVT");
5539 
5540     // If the lane is tautological the result can be constant-folded.
5541     if (TautologicalLane) {
5542       // Set P and K amount to a bogus values so we can try to splat them.
5543       P = 0;
5544       K = -1;
5545       // And ensure that comparison constant is tautological,
5546       // it will always compare true/false.
5547       Q = -1;
5548     }
5549 
5550     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5551     KAmts.push_back(
5552         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5553     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5554     return true;
5555   };
5556 
5557   SDValue N = REMNode.getOperand(0);
5558   SDValue D = REMNode.getOperand(1);
5559 
5560   // Collect the values from each element.
5561   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5562     return SDValue();
5563 
5564   // If all lanes are tautological, the result can be constant-folded.
5565   if (AllLanesAreTautological)
5566     return SDValue();
5567 
5568   // If this is a urem by a powers-of-two, avoid the fold since it can be
5569   // best implemented as a bit test.
5570   if (AllDivisorsArePowerOfTwo)
5571     return SDValue();
5572 
5573   SDValue PVal, KVal, QVal;
5574   if (VT.isVector()) {
5575     if (HadTautologicalLanes) {
5576       // Try to turn PAmts into a splat, since we don't care about the values
5577       // that are currently '0'. If we can't, just keep '0'`s.
5578       turnVectorIntoSplatVector(PAmts, isNullConstant);
5579       // Try to turn KAmts into a splat, since we don't care about the values
5580       // that are currently '-1'. If we can't, change them to '0'`s.
5581       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5582                                 DAG.getConstant(0, DL, ShSVT));
5583     }
5584 
5585     PVal = DAG.getBuildVector(VT, DL, PAmts);
5586     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5587     QVal = DAG.getBuildVector(VT, DL, QAmts);
5588   } else {
5589     PVal = PAmts[0];
5590     KVal = KAmts[0];
5591     QVal = QAmts[0];
5592   }
5593 
5594   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5595     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
5596       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5597     assert(CompTargetNode.getValueType() == N.getValueType() &&
5598            "Expecting that the types on LHS and RHS of comparisons match.");
5599     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5600   }
5601 
5602   // (mul N, P)
5603   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5604   Created.push_back(Op0.getNode());
5605 
5606   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5607   // divisors as a performance improvement, since rotating by 0 is a no-op.
5608   if (HadEvenDivisor) {
5609     // We need ROTR to do this.
5610     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
5611       return SDValue();
5612     SDNodeFlags Flags;
5613     Flags.setExact(true);
5614     // UREM: (rotr (mul N, P), K)
5615     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5616     Created.push_back(Op0.getNode());
5617   }
5618 
5619   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5620   SDValue NewCC =
5621       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5622                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5623   if (!HadTautologicalInvertedLanes)
5624     return NewCC;
5625 
5626   // If any lanes previously compared always-false, the NewCC will give
5627   // always-true result for them, so we need to fixup those lanes.
5628   // Or the other way around for inequality predicate.
5629   assert(VT.isVector() && "Can/should only get here for vectors.");
5630   Created.push_back(NewCC.getNode());
5631 
5632   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5633   // if C2 is not less than C1, the comparison is always false.
5634   // But we have produced the comparison that will give the
5635   // opposive tautological answer. So these lanes would need to be fixed up.
5636   SDValue TautologicalInvertedChannels =
5637       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5638   Created.push_back(TautologicalInvertedChannels.getNode());
5639 
5640   // NOTE: we avoid letting illegal types through even if we're before legalize
5641   // ops – legalization has a hard time producing good code for this.
5642   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5643     // If we have a vector select, let's replace the comparison results in the
5644     // affected lanes with the correct tautological result.
5645     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5646                                               DL, SETCCVT, SETCCVT);
5647     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5648                        Replacement, NewCC);
5649   }
5650 
5651   // Else, we can just invert the comparison result in the appropriate lanes.
5652   //
5653   // NOTE: see the note above VSELECT above.
5654   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5655     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5656                        TautologicalInvertedChannels);
5657 
5658   return SDValue(); // Don't know how to lower.
5659 }
5660 
5661 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5662 /// where the divisor is constant and the comparison target is zero,
5663 /// return a DAG expression that will generate the same comparison result
5664 /// using only multiplications, additions and shifts/rotations.
5665 /// Ref: "Hacker's Delight" 10-17.
5666 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5667                                         SDValue CompTargetNode,
5668                                         ISD::CondCode Cond,
5669                                         DAGCombinerInfo &DCI,
5670                                         const SDLoc &DL) const {
5671   SmallVector<SDNode *, 7> Built;
5672   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5673                                          DCI, DL, Built)) {
5674     assert(Built.size() <= 7 && "Max size prediction failed.");
5675     for (SDNode *N : Built)
5676       DCI.AddToWorklist(N);
5677     return Folded;
5678   }
5679 
5680   return SDValue();
5681 }
5682 
5683 SDValue
5684 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5685                                   SDValue CompTargetNode, ISD::CondCode Cond,
5686                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5687                                   SmallVectorImpl<SDNode *> &Created) const {
5688   // Fold:
5689   //   (seteq/ne (srem N, D), 0)
5690   // To:
5691   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5692   //
5693   // - D must be constant, with D = D0 * 2^K where D0 is odd
5694   // - P is the multiplicative inverse of D0 modulo 2^W
5695   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5696   // - Q = floor((2 * A) / (2^K))
5697   // where W is the width of the common type of N and D.
5698   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5699          "Only applicable for (in)equality comparisons.");
5700 
5701   SelectionDAG &DAG = DCI.DAG;
5702 
5703   EVT VT = REMNode.getValueType();
5704   EVT SVT = VT.getScalarType();
5705   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
5706   EVT ShSVT = ShVT.getScalarType();
5707 
5708   // If we are after ops legalization, and MUL is unavailable, we can not
5709   // proceed.
5710   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
5711     return SDValue();
5712 
5713   // TODO: Could support comparing with non-zero too.
5714   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5715   if (!CompTarget || !CompTarget->isNullValue())
5716     return SDValue();
5717 
5718   bool HadIntMinDivisor = false;
5719   bool HadOneDivisor = false;
5720   bool AllDivisorsAreOnes = true;
5721   bool HadEvenDivisor = false;
5722   bool NeedToApplyOffset = false;
5723   bool AllDivisorsArePowerOfTwo = true;
5724   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5725 
5726   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5727     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5728     if (C->isNullValue())
5729       return false;
5730 
5731     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5732 
5733     // WARNING: this fold is only valid for positive divisors!
5734     APInt D = C->getAPIntValue();
5735     if (D.isNegative())
5736       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5737 
5738     HadIntMinDivisor |= D.isMinSignedValue();
5739 
5740     // If all divisors are ones, we will prefer to avoid the fold.
5741     HadOneDivisor |= D.isOneValue();
5742     AllDivisorsAreOnes &= D.isOneValue();
5743 
5744     // Decompose D into D0 * 2^K
5745     unsigned K = D.countTrailingZeros();
5746     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5747     APInt D0 = D.lshr(K);
5748 
5749     if (!D.isMinSignedValue()) {
5750       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5751       // we don't care about this lane in this fold, we'll special-handle it.
5752       HadEvenDivisor |= (K != 0);
5753     }
5754 
5755     // D is a power-of-two if D0 is one. This includes INT_MIN.
5756     // If all divisors are power-of-two, we will prefer to avoid the fold.
5757     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5758 
5759     // P = inv(D0, 2^W)
5760     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5761     unsigned W = D.getBitWidth();
5762     APInt P = D0.zext(W + 1)
5763                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5764                   .trunc(W);
5765     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5766     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5767 
5768     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5769     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5770     A.clearLowBits(K);
5771 
5772     if (!D.isMinSignedValue()) {
5773       // If divisor INT_MIN, then we don't care about this lane in this fold,
5774       // we'll special-handle it.
5775       NeedToApplyOffset |= A != 0;
5776     }
5777 
5778     // Q = floor((2 * A) / (2^K))
5779     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5780 
5781     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5782            "We are expecting that A is always less than all-ones for SVT");
5783     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5784            "We are expecting that K is always less than all-ones for ShSVT");
5785 
5786     // If the divisor is 1 the result can be constant-folded. Likewise, we
5787     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5788     if (D.isOneValue()) {
5789       // Set P, A and K to a bogus values so we can try to splat them.
5790       P = 0;
5791       A = -1;
5792       K = -1;
5793 
5794       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5795       Q = -1;
5796     }
5797 
5798     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5799     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5800     KAmts.push_back(
5801         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5802     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5803     return true;
5804   };
5805 
5806   SDValue N = REMNode.getOperand(0);
5807   SDValue D = REMNode.getOperand(1);
5808 
5809   // Collect the values from each element.
5810   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5811     return SDValue();
5812 
5813   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5814   if (AllDivisorsAreOnes)
5815     return SDValue();
5816 
5817   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5818   // since it can be best implemented as a bit test.
5819   if (AllDivisorsArePowerOfTwo)
5820     return SDValue();
5821 
5822   SDValue PVal, AVal, KVal, QVal;
5823   if (D.getOpcode() == ISD::BUILD_VECTOR) {
5824     if (HadOneDivisor) {
5825       // Try to turn PAmts into a splat, since we don't care about the values
5826       // that are currently '0'. If we can't, just keep '0'`s.
5827       turnVectorIntoSplatVector(PAmts, isNullConstant);
5828       // Try to turn AAmts into a splat, since we don't care about the
5829       // values that are currently '-1'. If we can't, change them to '0'`s.
5830       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5831                                 DAG.getConstant(0, DL, SVT));
5832       // Try to turn KAmts into a splat, since we don't care about the values
5833       // that are currently '-1'. If we can't, change them to '0'`s.
5834       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5835                                 DAG.getConstant(0, DL, ShSVT));
5836     }
5837 
5838     PVal = DAG.getBuildVector(VT, DL, PAmts);
5839     AVal = DAG.getBuildVector(VT, DL, AAmts);
5840     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5841     QVal = DAG.getBuildVector(VT, DL, QAmts);
5842   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
5843     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
5844            QAmts.size() == 1 &&
5845            "Expected matchUnaryPredicate to return one element for scalable "
5846            "vectors");
5847     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
5848     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
5849     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
5850     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
5851   } else {
5852     assert(isa<ConstantSDNode>(D) && "Expected a constant");
5853     PVal = PAmts[0];
5854     AVal = AAmts[0];
5855     KVal = KAmts[0];
5856     QVal = QAmts[0];
5857   }
5858 
5859   // (mul N, P)
5860   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5861   Created.push_back(Op0.getNode());
5862 
5863   if (NeedToApplyOffset) {
5864     // We need ADD to do this.
5865     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
5866       return SDValue();
5867 
5868     // (add (mul N, P), A)
5869     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5870     Created.push_back(Op0.getNode());
5871   }
5872 
5873   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5874   // divisors as a performance improvement, since rotating by 0 is a no-op.
5875   if (HadEvenDivisor) {
5876     // We need ROTR to do this.
5877     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
5878       return SDValue();
5879     SDNodeFlags Flags;
5880     Flags.setExact(true);
5881     // SREM: (rotr (add (mul N, P), A), K)
5882     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5883     Created.push_back(Op0.getNode());
5884   }
5885 
5886   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5887   SDValue Fold =
5888       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5889                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5890 
5891   // If we didn't have lanes with INT_MIN divisor, then we're done.
5892   if (!HadIntMinDivisor)
5893     return Fold;
5894 
5895   // That fold is only valid for positive divisors. Which effectively means,
5896   // it is invalid for INT_MIN divisors. So if we have such a lane,
5897   // we must fix-up results for said lanes.
5898   assert(VT.isVector() && "Can/should only get here for vectors.");
5899 
5900   // NOTE: we avoid letting illegal types through even if we're before legalize
5901   // ops – legalization has a hard time producing good code for the code that
5902   // follows.
5903   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5904       !isOperationLegalOrCustom(ISD::AND, VT) ||
5905       !isOperationLegalOrCustom(Cond, VT) ||
5906       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5907     return SDValue();
5908 
5909   Created.push_back(Fold.getNode());
5910 
5911   SDValue IntMin = DAG.getConstant(
5912       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5913   SDValue IntMax = DAG.getConstant(
5914       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5915   SDValue Zero =
5916       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5917 
5918   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5919   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5920   Created.push_back(DivisorIsIntMin.getNode());
5921 
5922   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5923   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5924   Created.push_back(Masked.getNode());
5925   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5926   Created.push_back(MaskedIsZero.getNode());
5927 
5928   // To produce final result we need to blend 2 vectors: 'SetCC' and
5929   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5930   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5931   // constant-folded, select can get lowered to a shuffle with constant mask.
5932   SDValue Blended =
5933       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5934 
5935   return Blended;
5936 }
5937 
5938 bool TargetLowering::
5939 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5940   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5941     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5942                                 "be a constant integer");
5943     return true;
5944   }
5945 
5946   return false;
5947 }
5948 
5949 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
5950                                          const DenormalMode &Mode) const {
5951   SDLoc DL(Op);
5952   EVT VT = Op.getValueType();
5953   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5954   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5955   // Testing it with denormal inputs to avoid wrong estimate.
5956   if (Mode.Input == DenormalMode::IEEE) {
5957     // This is specifically a check for the handling of denormal inputs,
5958     // not the result.
5959 
5960     // Test = fabs(X) < SmallestNormal
5961     const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
5962     APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
5963     SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
5964     SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
5965     return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
5966   }
5967   // Test = X == 0.0
5968   return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
5969 }
5970 
5971 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5972                                              bool LegalOps, bool OptForSize,
5973                                              NegatibleCost &Cost,
5974                                              unsigned Depth) const {
5975   // fneg is removable even if it has multiple uses.
5976   if (Op.getOpcode() == ISD::FNEG) {
5977     Cost = NegatibleCost::Cheaper;
5978     return Op.getOperand(0);
5979   }
5980 
5981   // Don't recurse exponentially.
5982   if (Depth > SelectionDAG::MaxRecursionDepth)
5983     return SDValue();
5984 
5985   // Pre-increment recursion depth for use in recursive calls.
5986   ++Depth;
5987   const SDNodeFlags Flags = Op->getFlags();
5988   const TargetOptions &Options = DAG.getTarget().Options;
5989   EVT VT = Op.getValueType();
5990   unsigned Opcode = Op.getOpcode();
5991 
5992   // Don't allow anything with multiple uses unless we know it is free.
5993   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
5994     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
5995                         isFPExtFree(VT, Op.getOperand(0).getValueType());
5996     if (!IsFreeExtend)
5997       return SDValue();
5998   }
5999 
6000   auto RemoveDeadNode = [&](SDValue N) {
6001     if (N && N.getNode()->use_empty())
6002       DAG.RemoveDeadNode(N.getNode());
6003   };
6004 
6005   SDLoc DL(Op);
6006 
6007   // Because getNegatedExpression can delete nodes we need a handle to keep
6008   // temporary nodes alive in case the recursion manages to create an identical
6009   // node.
6010   std::list<HandleSDNode> Handles;
6011 
6012   switch (Opcode) {
6013   case ISD::ConstantFP: {
6014     // Don't invert constant FP values after legalization unless the target says
6015     // the negated constant is legal.
6016     bool IsOpLegal =
6017         isOperationLegal(ISD::ConstantFP, VT) ||
6018         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
6019                      OptForSize);
6020 
6021     if (LegalOps && !IsOpLegal)
6022       break;
6023 
6024     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
6025     V.changeSign();
6026     SDValue CFP = DAG.getConstantFP(V, DL, VT);
6027 
6028     // If we already have the use of the negated floating constant, it is free
6029     // to negate it even it has multiple uses.
6030     if (!Op.hasOneUse() && CFP.use_empty())
6031       break;
6032     Cost = NegatibleCost::Neutral;
6033     return CFP;
6034   }
6035   case ISD::BUILD_VECTOR: {
6036     // Only permit BUILD_VECTOR of constants.
6037     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
6038           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
6039         }))
6040       break;
6041 
6042     bool IsOpLegal =
6043         (isOperationLegal(ISD::ConstantFP, VT) &&
6044          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
6045         llvm::all_of(Op->op_values(), [&](SDValue N) {
6046           return N.isUndef() ||
6047                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
6048                               OptForSize);
6049         });
6050 
6051     if (LegalOps && !IsOpLegal)
6052       break;
6053 
6054     SmallVector<SDValue, 4> Ops;
6055     for (SDValue C : Op->op_values()) {
6056       if (C.isUndef()) {
6057         Ops.push_back(C);
6058         continue;
6059       }
6060       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
6061       V.changeSign();
6062       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
6063     }
6064     Cost = NegatibleCost::Neutral;
6065     return DAG.getBuildVector(VT, DL, Ops);
6066   }
6067   case ISD::FADD: {
6068     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6069       break;
6070 
6071     // After operation legalization, it might not be legal to create new FSUBs.
6072     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
6073       break;
6074     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6075 
6076     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6077     NegatibleCost CostX = NegatibleCost::Expensive;
6078     SDValue NegX =
6079         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6080     // Prevent this node from being deleted by the next call.
6081     if (NegX)
6082       Handles.emplace_back(NegX);
6083 
6084     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6085     NegatibleCost CostY = NegatibleCost::Expensive;
6086     SDValue NegY =
6087         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6088 
6089     // We're done with the handles.
6090     Handles.clear();
6091 
6092     // Negate the X if its cost is less or equal than Y.
6093     if (NegX && (CostX <= CostY)) {
6094       Cost = CostX;
6095       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
6096       if (NegY != N)
6097         RemoveDeadNode(NegY);
6098       return N;
6099     }
6100 
6101     // Negate the Y if it is not expensive.
6102     if (NegY) {
6103       Cost = CostY;
6104       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
6105       if (NegX != N)
6106         RemoveDeadNode(NegX);
6107       return N;
6108     }
6109     break;
6110   }
6111   case ISD::FSUB: {
6112     // We can't turn -(A-B) into B-A when we honor signed zeros.
6113     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6114       break;
6115 
6116     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6117     // fold (fneg (fsub 0, Y)) -> Y
6118     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
6119       if (C->isZero()) {
6120         Cost = NegatibleCost::Cheaper;
6121         return Y;
6122       }
6123 
6124     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6125     Cost = NegatibleCost::Neutral;
6126     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
6127   }
6128   case ISD::FMUL:
6129   case ISD::FDIV: {
6130     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6131 
6132     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6133     NegatibleCost CostX = NegatibleCost::Expensive;
6134     SDValue NegX =
6135         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6136     // Prevent this node from being deleted by the next call.
6137     if (NegX)
6138       Handles.emplace_back(NegX);
6139 
6140     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6141     NegatibleCost CostY = NegatibleCost::Expensive;
6142     SDValue NegY =
6143         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6144 
6145     // We're done with the handles.
6146     Handles.clear();
6147 
6148     // Negate the X if its cost is less or equal than Y.
6149     if (NegX && (CostX <= CostY)) {
6150       Cost = CostX;
6151       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
6152       if (NegY != N)
6153         RemoveDeadNode(NegY);
6154       return N;
6155     }
6156 
6157     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6158     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
6159       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
6160         break;
6161 
6162     // Negate the Y if it is not expensive.
6163     if (NegY) {
6164       Cost = CostY;
6165       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
6166       if (NegX != N)
6167         RemoveDeadNode(NegX);
6168       return N;
6169     }
6170     break;
6171   }
6172   case ISD::FMA:
6173   case ISD::FMAD: {
6174     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6175       break;
6176 
6177     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
6178     NegatibleCost CostZ = NegatibleCost::Expensive;
6179     SDValue NegZ =
6180         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
6181     // Give up if fail to negate the Z.
6182     if (!NegZ)
6183       break;
6184 
6185     // Prevent this node from being deleted by the next two calls.
6186     Handles.emplace_back(NegZ);
6187 
6188     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6189     NegatibleCost CostX = NegatibleCost::Expensive;
6190     SDValue NegX =
6191         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6192     // Prevent this node from being deleted by the next call.
6193     if (NegX)
6194       Handles.emplace_back(NegX);
6195 
6196     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6197     NegatibleCost CostY = NegatibleCost::Expensive;
6198     SDValue NegY =
6199         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6200 
6201     // We're done with the handles.
6202     Handles.clear();
6203 
6204     // Negate the X if its cost is less or equal than Y.
6205     if (NegX && (CostX <= CostY)) {
6206       Cost = std::min(CostX, CostZ);
6207       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
6208       if (NegY != N)
6209         RemoveDeadNode(NegY);
6210       return N;
6211     }
6212 
6213     // Negate the Y if it is not expensive.
6214     if (NegY) {
6215       Cost = std::min(CostY, CostZ);
6216       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
6217       if (NegX != N)
6218         RemoveDeadNode(NegX);
6219       return N;
6220     }
6221     break;
6222   }
6223 
6224   case ISD::FP_EXTEND:
6225   case ISD::FSIN:
6226     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6227                                             OptForSize, Cost, Depth))
6228       return DAG.getNode(Opcode, DL, VT, NegV);
6229     break;
6230   case ISD::FP_ROUND:
6231     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6232                                             OptForSize, Cost, Depth))
6233       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
6234     break;
6235   }
6236 
6237   return SDValue();
6238 }
6239 
6240 //===----------------------------------------------------------------------===//
6241 // Legalization Utilities
6242 //===----------------------------------------------------------------------===//
6243 
6244 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
6245                                     SDValue LHS, SDValue RHS,
6246                                     SmallVectorImpl<SDValue> &Result,
6247                                     EVT HiLoVT, SelectionDAG &DAG,
6248                                     MulExpansionKind Kind, SDValue LL,
6249                                     SDValue LH, SDValue RL, SDValue RH) const {
6250   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
6251          Opcode == ISD::SMUL_LOHI);
6252 
6253   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
6254                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
6255   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
6256                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
6257   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6258                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
6259   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6260                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
6261 
6262   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
6263     return false;
6264 
6265   unsigned OuterBitSize = VT.getScalarSizeInBits();
6266   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
6267 
6268   // LL, LH, RL, and RH must be either all NULL or all set to a value.
6269   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
6270          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
6271 
6272   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
6273   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
6274                           bool Signed) -> bool {
6275     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
6276       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
6277       Hi = SDValue(Lo.getNode(), 1);
6278       return true;
6279     }
6280     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
6281       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
6282       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
6283       return true;
6284     }
6285     return false;
6286   };
6287 
6288   SDValue Lo, Hi;
6289 
6290   if (!LL.getNode() && !RL.getNode() &&
6291       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6292     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
6293     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
6294   }
6295 
6296   if (!LL.getNode())
6297     return false;
6298 
6299   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
6300   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
6301       DAG.MaskedValueIsZero(RHS, HighMask)) {
6302     // The inputs are both zero-extended.
6303     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
6304       Result.push_back(Lo);
6305       Result.push_back(Hi);
6306       if (Opcode != ISD::MUL) {
6307         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6308         Result.push_back(Zero);
6309         Result.push_back(Zero);
6310       }
6311       return true;
6312     }
6313   }
6314 
6315   if (!VT.isVector() && Opcode == ISD::MUL &&
6316       DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
6317       DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
6318     // The input values are both sign-extended.
6319     // TODO non-MUL case?
6320     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
6321       Result.push_back(Lo);
6322       Result.push_back(Hi);
6323       return true;
6324     }
6325   }
6326 
6327   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
6328   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
6329   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
6330     // FIXME getShiftAmountTy does not always return a sensible result when VT
6331     // is an illegal type, and so the type may be too small to fit the shift
6332     // amount. Override it with i32. The shift will have to be legalized.
6333     ShiftAmountTy = MVT::i32;
6334   }
6335   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
6336 
6337   if (!LH.getNode() && !RH.getNode() &&
6338       isOperationLegalOrCustom(ISD::SRL, VT) &&
6339       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6340     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
6341     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6342     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6343     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6344   }
6345 
6346   if (!LH.getNode())
6347     return false;
6348 
6349   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6350     return false;
6351 
6352   Result.push_back(Lo);
6353 
6354   if (Opcode == ISD::MUL) {
6355     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6356     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6357     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6358     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6359     Result.push_back(Hi);
6360     return true;
6361   }
6362 
6363   // Compute the full width result.
6364   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6365     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6366     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6367     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6368     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6369   };
6370 
6371   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6372   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6373     return false;
6374 
6375   // This is effectively the add part of a multiply-add of half-sized operands,
6376   // so it cannot overflow.
6377   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6378 
6379   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6380     return false;
6381 
6382   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6383   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6384 
6385   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6386                   isOperationLegalOrCustom(ISD::ADDE, VT));
6387   if (UseGlue)
6388     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6389                        Merge(Lo, Hi));
6390   else
6391     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6392                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6393 
6394   SDValue Carry = Next.getValue(1);
6395   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6396   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6397 
6398   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6399     return false;
6400 
6401   if (UseGlue)
6402     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6403                      Carry);
6404   else
6405     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6406                      Zero, Carry);
6407 
6408   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6409 
6410   if (Opcode == ISD::SMUL_LOHI) {
6411     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6412                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6413     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6414 
6415     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6416                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6417     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6418   }
6419 
6420   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6421   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6422   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6423   return true;
6424 }
6425 
6426 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6427                                SelectionDAG &DAG, MulExpansionKind Kind,
6428                                SDValue LL, SDValue LH, SDValue RL,
6429                                SDValue RH) const {
6430   SmallVector<SDValue, 2> Result;
6431   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
6432                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6433                            DAG, Kind, LL, LH, RL, RH);
6434   if (Ok) {
6435     assert(Result.size() == 2);
6436     Lo = Result[0];
6437     Hi = Result[1];
6438   }
6439   return Ok;
6440 }
6441 
6442 // Check that (every element of) Z is undef or not an exact multiple of BW.
6443 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
6444   return ISD::matchUnaryPredicate(
6445       Z,
6446       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
6447       true);
6448 }
6449 
6450 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
6451                                        SelectionDAG &DAG) const {
6452   EVT VT = Node->getValueType(0);
6453 
6454   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6455                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6456                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6457                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6458     return false;
6459 
6460   SDValue X = Node->getOperand(0);
6461   SDValue Y = Node->getOperand(1);
6462   SDValue Z = Node->getOperand(2);
6463 
6464   unsigned BW = VT.getScalarSizeInBits();
6465   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6466   SDLoc DL(SDValue(Node, 0));
6467 
6468   EVT ShVT = Z.getValueType();
6469 
6470   // If a funnel shift in the other direction is more supported, use it.
6471   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
6472   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
6473       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
6474     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6475       // fshl X, Y, Z -> fshr X, Y, -Z
6476       // fshr X, Y, Z -> fshl X, Y, -Z
6477       SDValue Zero = DAG.getConstant(0, DL, ShVT);
6478       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
6479     } else {
6480       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6481       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6482       SDValue One = DAG.getConstant(1, DL, ShVT);
6483       if (IsFSHL) {
6484         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6485         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
6486       } else {
6487         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6488         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
6489       }
6490       Z = DAG.getNOT(DL, Z, ShVT);
6491     }
6492     Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
6493     return true;
6494   }
6495 
6496   SDValue ShX, ShY;
6497   SDValue ShAmt, InvShAmt;
6498   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6499     // fshl: X << C | Y >> (BW - C)
6500     // fshr: X << (BW - C) | Y >> C
6501     // where C = Z % BW is not zero
6502     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
6503     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6504     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
6505     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
6506     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
6507   } else {
6508     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6509     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6510     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
6511     if (isPowerOf2_32(BW)) {
6512       // Z % BW -> Z & (BW - 1)
6513       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
6514       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6515       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
6516     } else {
6517       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
6518       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6519       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
6520     }
6521 
6522     SDValue One = DAG.getConstant(1, DL, ShVT);
6523     if (IsFSHL) {
6524       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
6525       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
6526       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
6527     } else {
6528       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
6529       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
6530       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
6531     }
6532   }
6533   Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
6534   return true;
6535 }
6536 
6537 // TODO: Merge with expandFunnelShift.
6538 bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
6539                                SDValue &Result, SelectionDAG &DAG) const {
6540   EVT VT = Node->getValueType(0);
6541   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6542   bool IsLeft = Node->getOpcode() == ISD::ROTL;
6543   SDValue Op0 = Node->getOperand(0);
6544   SDValue Op1 = Node->getOperand(1);
6545   SDLoc DL(SDValue(Node, 0));
6546 
6547   EVT ShVT = Op1.getValueType();
6548   SDValue Zero = DAG.getConstant(0, DL, ShVT);
6549 
6550   // If a rotate in the other direction is supported, use it.
6551   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
6552   if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
6553     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
6554     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
6555     return true;
6556   }
6557 
6558   if (!AllowVectorOps && VT.isVector() &&
6559       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6560        !isOperationLegalOrCustom(ISD::SRL, VT) ||
6561        !isOperationLegalOrCustom(ISD::SUB, VT) ||
6562        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
6563        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6564     return false;
6565 
6566   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
6567   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
6568   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6569   SDValue ShVal;
6570   SDValue HsVal;
6571   if (isPowerOf2_32(EltSizeInBits)) {
6572     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6573     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6574     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
6575     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
6576     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
6577     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
6578     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
6579   } else {
6580     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6581     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6582     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6583     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
6584     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
6585     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
6586     SDValue One = DAG.getConstant(1, DL, ShVT);
6587     HsVal =
6588         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
6589   }
6590   Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
6591   return true;
6592 }
6593 
6594 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
6595                                       SelectionDAG &DAG) const {
6596   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
6597   EVT VT = Node->getValueType(0);
6598   unsigned VTBits = VT.getScalarSizeInBits();
6599   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
6600 
6601   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
6602   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
6603   SDValue ShOpLo = Node->getOperand(0);
6604   SDValue ShOpHi = Node->getOperand(1);
6605   SDValue ShAmt = Node->getOperand(2);
6606   EVT ShAmtVT = ShAmt.getValueType();
6607   EVT ShAmtCCVT =
6608       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
6609   SDLoc dl(Node);
6610 
6611   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
6612   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
6613   // away during isel.
6614   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
6615                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
6616   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
6617                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
6618                        : DAG.getConstant(0, dl, VT);
6619 
6620   SDValue Tmp2, Tmp3;
6621   if (IsSHL) {
6622     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
6623     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
6624   } else {
6625     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
6626     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
6627   }
6628 
6629   // If the shift amount is larger or equal than the width of a part we don't
6630   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
6631   // values for large shift amounts.
6632   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
6633                                 DAG.getConstant(VTBits, dl, ShAmtVT));
6634   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
6635                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
6636 
6637   if (IsSHL) {
6638     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
6639     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
6640   } else {
6641     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
6642     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
6643   }
6644 }
6645 
6646 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
6647                                       SelectionDAG &DAG) const {
6648   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6649   SDValue Src = Node->getOperand(OpNo);
6650   EVT SrcVT = Src.getValueType();
6651   EVT DstVT = Node->getValueType(0);
6652   SDLoc dl(SDValue(Node, 0));
6653 
6654   // FIXME: Only f32 to i64 conversions are supported.
6655   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
6656     return false;
6657 
6658   if (Node->isStrictFPOpcode())
6659     // When a NaN is converted to an integer a trap is allowed. We can't
6660     // use this expansion here because it would eliminate that trap. Other
6661     // traps are also allowed and cannot be eliminated. See
6662     // IEEE 754-2008 sec 5.8.
6663     return false;
6664 
6665   // Expand f32 -> i64 conversion
6666   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6667   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6668   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
6669   EVT IntVT = SrcVT.changeTypeToInteger();
6670   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
6671 
6672   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6673   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6674   SDValue Bias = DAG.getConstant(127, dl, IntVT);
6675   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6676   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6677   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6678 
6679   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6680 
6681   SDValue ExponentBits = DAG.getNode(
6682       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6683       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6684   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6685 
6686   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6687                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6688                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6689   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6690 
6691   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6692                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6693                           DAG.getConstant(0x00800000, dl, IntVT));
6694 
6695   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6696 
6697   R = DAG.getSelectCC(
6698       dl, Exponent, ExponentLoBit,
6699       DAG.getNode(ISD::SHL, dl, DstVT, R,
6700                   DAG.getZExtOrTrunc(
6701                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6702                       dl, IntShVT)),
6703       DAG.getNode(ISD::SRL, dl, DstVT, R,
6704                   DAG.getZExtOrTrunc(
6705                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6706                       dl, IntShVT)),
6707       ISD::SETGT);
6708 
6709   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6710                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6711 
6712   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6713                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6714   return true;
6715 }
6716 
6717 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6718                                       SDValue &Chain,
6719                                       SelectionDAG &DAG) const {
6720   SDLoc dl(SDValue(Node, 0));
6721   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6722   SDValue Src = Node->getOperand(OpNo);
6723 
6724   EVT SrcVT = Src.getValueType();
6725   EVT DstVT = Node->getValueType(0);
6726   EVT SetCCVT =
6727       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6728   EVT DstSetCCVT =
6729       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
6730 
6731   // Only expand vector types if we have the appropriate vector bit operations.
6732   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6733                                                    ISD::FP_TO_SINT;
6734   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6735                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6736     return false;
6737 
6738   // If the maximum float value is smaller then the signed integer range,
6739   // the destination signmask can't be represented by the float, so we can
6740   // just use FP_TO_SINT directly.
6741   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6742   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6743   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6744   if (APFloat::opOverflow &
6745       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6746     if (Node->isStrictFPOpcode()) {
6747       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6748                            { Node->getOperand(0), Src });
6749       Chain = Result.getValue(1);
6750     } else
6751       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6752     return true;
6753   }
6754 
6755   // Don't expand it if there isn't cheap fsub instruction.
6756   if (!isOperationLegalOrCustom(
6757           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
6758     return false;
6759 
6760   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6761   SDValue Sel;
6762 
6763   if (Node->isStrictFPOpcode()) {
6764     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6765                        Node->getOperand(0), /*IsSignaling*/ true);
6766     Chain = Sel.getValue(1);
6767   } else {
6768     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6769   }
6770 
6771   bool Strict = Node->isStrictFPOpcode() ||
6772                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6773 
6774   if (Strict) {
6775     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6776     // signmask then offset (the result of which should be fully representable).
6777     // Sel = Src < 0x8000000000000000
6778     // FltOfs = select Sel, 0, 0x8000000000000000
6779     // IntOfs = select Sel, 0, 0x8000000000000000
6780     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6781 
6782     // TODO: Should any fast-math-flags be set for the FSUB?
6783     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6784                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6785     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6786     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6787                                    DAG.getConstant(0, dl, DstVT),
6788                                    DAG.getConstant(SignMask, dl, DstVT));
6789     SDValue SInt;
6790     if (Node->isStrictFPOpcode()) {
6791       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6792                                 { Chain, Src, FltOfs });
6793       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6794                          { Val.getValue(1), Val });
6795       Chain = SInt.getValue(1);
6796     } else {
6797       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6798       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6799     }
6800     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6801   } else {
6802     // Expand based on maximum range of FP_TO_SINT:
6803     // True = fp_to_sint(Src)
6804     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6805     // Result = select (Src < 0x8000000000000000), True, False
6806 
6807     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6808     // TODO: Should any fast-math-flags be set for the FSUB?
6809     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6810                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6811     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6812                         DAG.getConstant(SignMask, dl, DstVT));
6813     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6814     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6815   }
6816   return true;
6817 }
6818 
6819 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6820                                       SDValue &Chain,
6821                                       SelectionDAG &DAG) const {
6822   // This transform is not correct for converting 0 when rounding mode is set
6823   // to round toward negative infinity which will produce -0.0. So disable under
6824   // strictfp.
6825   if (Node->isStrictFPOpcode())
6826     return false;
6827 
6828   SDValue Src = Node->getOperand(0);
6829   EVT SrcVT = Src.getValueType();
6830   EVT DstVT = Node->getValueType(0);
6831 
6832   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
6833     return false;
6834 
6835   // Only expand vector types if we have the appropriate vector bit operations.
6836   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6837                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6838                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6839                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6840                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6841     return false;
6842 
6843   SDLoc dl(SDValue(Node, 0));
6844   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6845 
6846   // Implementation of unsigned i64 to f64 following the algorithm in
6847   // __floatundidf in compiler_rt.  This implementation performs rounding
6848   // correctly in all rounding modes with the exception of converting 0
6849   // when rounding toward negative infinity. In that case the fsub will produce
6850   // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
6851   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6852   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6853       BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6854   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6855   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6856   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6857 
6858   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6859   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6860   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6861   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6862   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6863   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6864   SDValue HiSub =
6865       DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6866   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6867   return true;
6868 }
6869 
6870 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6871                                               SelectionDAG &DAG) const {
6872   SDLoc dl(Node);
6873   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6874     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6875   EVT VT = Node->getValueType(0);
6876 
6877   if (VT.isScalableVector())
6878     report_fatal_error(
6879         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
6880 
6881   if (isOperationLegalOrCustom(NewOp, VT)) {
6882     SDValue Quiet0 = Node->getOperand(0);
6883     SDValue Quiet1 = Node->getOperand(1);
6884 
6885     if (!Node->getFlags().hasNoNaNs()) {
6886       // Insert canonicalizes if it's possible we need to quiet to get correct
6887       // sNaN behavior.
6888       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6889         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6890                              Node->getFlags());
6891       }
6892       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6893         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6894                              Node->getFlags());
6895       }
6896     }
6897 
6898     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6899   }
6900 
6901   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6902   // instead if there are no NaNs.
6903   if (Node->getFlags().hasNoNaNs()) {
6904     unsigned IEEE2018Op =
6905         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6906     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6907       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6908                          Node->getOperand(1), Node->getFlags());
6909     }
6910   }
6911 
6912   // If none of the above worked, but there are no NaNs, then expand to
6913   // a compare/select sequence.  This is required for correctness since
6914   // InstCombine might have canonicalized a fcmp+select sequence to a
6915   // FMINNUM/FMAXNUM node.  If we were to fall through to the default
6916   // expansion to libcall, we might introduce a link-time dependency
6917   // on libm into a file that originally did not have one.
6918   if (Node->getFlags().hasNoNaNs()) {
6919     ISD::CondCode Pred =
6920         Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6921     SDValue Op1 = Node->getOperand(0);
6922     SDValue Op2 = Node->getOperand(1);
6923     SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6924     // Copy FMF flags, but always set the no-signed-zeros flag
6925     // as this is implied by the FMINNUM/FMAXNUM semantics.
6926     SDNodeFlags Flags = Node->getFlags();
6927     Flags.setNoSignedZeros(true);
6928     SelCC->setFlags(Flags);
6929     return SelCC;
6930   }
6931 
6932   return SDValue();
6933 }
6934 
6935 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6936                                  SelectionDAG &DAG) const {
6937   SDLoc dl(Node);
6938   EVT VT = Node->getValueType(0);
6939   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6940   SDValue Op = Node->getOperand(0);
6941   unsigned Len = VT.getScalarSizeInBits();
6942   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6943 
6944   // TODO: Add support for irregular type lengths.
6945   if (!(Len <= 128 && Len % 8 == 0))
6946     return false;
6947 
6948   // Only expand vector types if we have the appropriate vector bit operations.
6949   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6950                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6951                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6952                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6953                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6954     return false;
6955 
6956   // This is the "best" algorithm from
6957   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6958   SDValue Mask55 =
6959       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6960   SDValue Mask33 =
6961       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6962   SDValue Mask0F =
6963       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6964   SDValue Mask01 =
6965       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6966 
6967   // v = v - ((v >> 1) & 0x55555555...)
6968   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6969                    DAG.getNode(ISD::AND, dl, VT,
6970                                DAG.getNode(ISD::SRL, dl, VT, Op,
6971                                            DAG.getConstant(1, dl, ShVT)),
6972                                Mask55));
6973   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6974   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6975                    DAG.getNode(ISD::AND, dl, VT,
6976                                DAG.getNode(ISD::SRL, dl, VT, Op,
6977                                            DAG.getConstant(2, dl, ShVT)),
6978                                Mask33));
6979   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6980   Op = DAG.getNode(ISD::AND, dl, VT,
6981                    DAG.getNode(ISD::ADD, dl, VT, Op,
6982                                DAG.getNode(ISD::SRL, dl, VT, Op,
6983                                            DAG.getConstant(4, dl, ShVT))),
6984                    Mask0F);
6985   // v = (v * 0x01010101...) >> (Len - 8)
6986   if (Len > 8)
6987     Op =
6988         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6989                     DAG.getConstant(Len - 8, dl, ShVT));
6990 
6991   Result = Op;
6992   return true;
6993 }
6994 
6995 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6996                                 SelectionDAG &DAG) const {
6997   SDLoc dl(Node);
6998   EVT VT = Node->getValueType(0);
6999   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7000   SDValue Op = Node->getOperand(0);
7001   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7002 
7003   // If the non-ZERO_UNDEF version is supported we can use that instead.
7004   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
7005       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
7006     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
7007     return true;
7008   }
7009 
7010   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7011   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
7012     EVT SetCCVT =
7013         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7014     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
7015     SDValue Zero = DAG.getConstant(0, dl, VT);
7016     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7017     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
7018                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
7019     return true;
7020   }
7021 
7022   // Only expand vector types if we have the appropriate vector bit operations.
7023   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7024                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
7025                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7026                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7027     return false;
7028 
7029   // for now, we do this:
7030   // x = x | (x >> 1);
7031   // x = x | (x >> 2);
7032   // ...
7033   // x = x | (x >>16);
7034   // x = x | (x >>32); // for 64-bit input
7035   // return popcount(~x);
7036   //
7037   // Ref: "Hacker's Delight" by Henry Warren
7038   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
7039     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
7040     Op = DAG.getNode(ISD::OR, dl, VT, Op,
7041                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
7042   }
7043   Op = DAG.getNOT(dl, Op, VT);
7044   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
7045   return true;
7046 }
7047 
7048 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
7049                                 SelectionDAG &DAG) const {
7050   SDLoc dl(Node);
7051   EVT VT = Node->getValueType(0);
7052   SDValue Op = Node->getOperand(0);
7053   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7054 
7055   // If the non-ZERO_UNDEF version is supported we can use that instead.
7056   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
7057       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
7058     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
7059     return true;
7060   }
7061 
7062   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7063   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
7064     EVT SetCCVT =
7065         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7066     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
7067     SDValue Zero = DAG.getConstant(0, dl, VT);
7068     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7069     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
7070                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
7071     return true;
7072   }
7073 
7074   // Only expand vector types if we have the appropriate vector bit operations.
7075   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7076                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7077                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
7078                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7079                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
7080                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7081     return false;
7082 
7083   // for now, we use: { return popcount(~x & (x - 1)); }
7084   // unless the target has ctlz but not ctpop, in which case we use:
7085   // { return 32 - nlz(~x & (x-1)); }
7086   // Ref: "Hacker's Delight" by Henry Warren
7087   SDValue Tmp = DAG.getNode(
7088       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
7089       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
7090 
7091   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
7092   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
7093     Result =
7094         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
7095                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
7096     return true;
7097   }
7098 
7099   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
7100   return true;
7101 }
7102 
7103 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
7104                                SelectionDAG &DAG, bool IsNegative) const {
7105   SDLoc dl(N);
7106   EVT VT = N->getValueType(0);
7107   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7108   SDValue Op = N->getOperand(0);
7109 
7110   // abs(x) -> smax(x,sub(0,x))
7111   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7112       isOperationLegal(ISD::SMAX, VT)) {
7113     SDValue Zero = DAG.getConstant(0, dl, VT);
7114     Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
7115                          DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7116     return true;
7117   }
7118 
7119   // abs(x) -> umin(x,sub(0,x))
7120   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7121       isOperationLegal(ISD::UMIN, VT)) {
7122     SDValue Zero = DAG.getConstant(0, dl, VT);
7123     Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
7124                          DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7125     return true;
7126   }
7127 
7128   // 0 - abs(x) -> smin(x, sub(0,x))
7129   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
7130       isOperationLegal(ISD::SMIN, VT)) {
7131     SDValue Zero = DAG.getConstant(0, dl, VT);
7132     Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
7133                          DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7134     return true;
7135   }
7136 
7137   // Only expand vector types if we have the appropriate vector operations.
7138   if (VT.isVector() &&
7139       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
7140        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
7141        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
7142        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7143     return false;
7144 
7145   SDValue Shift =
7146       DAG.getNode(ISD::SRA, dl, VT, Op,
7147                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
7148   if (!IsNegative) {
7149     SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
7150     Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
7151   } else {
7152     // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
7153     SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
7154     Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
7155   }
7156   return true;
7157 }
7158 
7159 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
7160   SDLoc dl(N);
7161   EVT VT = N->getValueType(0);
7162   SDValue Op = N->getOperand(0);
7163 
7164   if (!VT.isSimple())
7165     return SDValue();
7166 
7167   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7168   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
7169   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
7170   default:
7171     return SDValue();
7172   case MVT::i16:
7173     // Use a rotate by 8. This can be further expanded if necessary.
7174     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7175   case MVT::i32:
7176     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7177     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7178     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7179     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7180     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7181                        DAG.getConstant(0xFF0000, dl, VT));
7182     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
7183     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7184     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7185     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7186   case MVT::i64:
7187     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7188     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7189     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7190     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7191     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7192     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7193     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7194     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7195     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
7196                        DAG.getConstant(255ULL<<48, dl, VT));
7197     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
7198                        DAG.getConstant(255ULL<<40, dl, VT));
7199     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
7200                        DAG.getConstant(255ULL<<32, dl, VT));
7201     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
7202                        DAG.getConstant(255ULL<<24, dl, VT));
7203     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7204                        DAG.getConstant(255ULL<<16, dl, VT));
7205     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
7206                        DAG.getConstant(255ULL<<8 , dl, VT));
7207     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
7208     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
7209     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7210     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7211     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
7212     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7213     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
7214   }
7215 }
7216 
7217 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
7218   SDLoc dl(N);
7219   EVT VT = N->getValueType(0);
7220   SDValue Op = N->getOperand(0);
7221   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7222   unsigned Sz = VT.getScalarSizeInBits();
7223 
7224   SDValue Tmp, Tmp2, Tmp3;
7225 
7226   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
7227   // and finally the i1 pairs.
7228   // TODO: We can easily support i4/i2 legal types if any target ever does.
7229   if (Sz >= 8 && isPowerOf2_32(Sz)) {
7230     // Create the masks - repeating the pattern every byte.
7231     APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
7232     APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
7233     APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
7234     APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
7235     APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
7236     APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
7237 
7238     // BSWAP if the type is wider than a single byte.
7239     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
7240 
7241     // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
7242     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
7243     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
7244     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
7245     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
7246     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7247 
7248     // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
7249     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
7250     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
7251     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
7252     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
7253     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7254 
7255     // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
7256     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
7257     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
7258     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
7259     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
7260     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7261     return Tmp;
7262   }
7263 
7264   Tmp = DAG.getConstant(0, dl, VT);
7265   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
7266     if (I < J)
7267       Tmp2 =
7268           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
7269     else
7270       Tmp2 =
7271           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
7272 
7273     APInt Shift(Sz, 1);
7274     Shift <<= J;
7275     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
7276     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
7277   }
7278 
7279   return Tmp;
7280 }
7281 
7282 std::pair<SDValue, SDValue>
7283 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
7284                                     SelectionDAG &DAG) const {
7285   SDLoc SL(LD);
7286   SDValue Chain = LD->getChain();
7287   SDValue BasePTR = LD->getBasePtr();
7288   EVT SrcVT = LD->getMemoryVT();
7289   EVT DstVT = LD->getValueType(0);
7290   ISD::LoadExtType ExtType = LD->getExtensionType();
7291 
7292   if (SrcVT.isScalableVector())
7293     report_fatal_error("Cannot scalarize scalable vector loads");
7294 
7295   unsigned NumElem = SrcVT.getVectorNumElements();
7296 
7297   EVT SrcEltVT = SrcVT.getScalarType();
7298   EVT DstEltVT = DstVT.getScalarType();
7299 
7300   // A vector must always be stored in memory as-is, i.e. without any padding
7301   // between the elements, since various code depend on it, e.g. in the
7302   // handling of a bitcast of a vector type to int, which may be done with a
7303   // vector store followed by an integer load. A vector that does not have
7304   // elements that are byte-sized must therefore be stored as an integer
7305   // built out of the extracted vector elements.
7306   if (!SrcEltVT.isByteSized()) {
7307     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
7308     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
7309 
7310     unsigned NumSrcBits = SrcVT.getSizeInBits();
7311     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
7312 
7313     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
7314     SDValue SrcEltBitMask = DAG.getConstant(
7315         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
7316 
7317     // Load the whole vector and avoid masking off the top bits as it makes
7318     // the codegen worse.
7319     SDValue Load =
7320         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
7321                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
7322                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
7323 
7324     SmallVector<SDValue, 8> Vals;
7325     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7326       unsigned ShiftIntoIdx =
7327           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
7328       SDValue ShiftAmount =
7329           DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
7330                                      LoadVT, SL, /*LegalTypes=*/false);
7331       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
7332       SDValue Elt =
7333           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
7334       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
7335 
7336       if (ExtType != ISD::NON_EXTLOAD) {
7337         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
7338         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
7339       }
7340 
7341       Vals.push_back(Scalar);
7342     }
7343 
7344     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
7345     return std::make_pair(Value, Load.getValue(1));
7346   }
7347 
7348   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
7349   assert(SrcEltVT.isByteSized());
7350 
7351   SmallVector<SDValue, 8> Vals;
7352   SmallVector<SDValue, 8> LoadChains;
7353 
7354   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7355     SDValue ScalarLoad =
7356         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
7357                        LD->getPointerInfo().getWithOffset(Idx * Stride),
7358                        SrcEltVT, LD->getOriginalAlign(),
7359                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
7360 
7361     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
7362 
7363     Vals.push_back(ScalarLoad.getValue(0));
7364     LoadChains.push_back(ScalarLoad.getValue(1));
7365   }
7366 
7367   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
7368   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
7369 
7370   return std::make_pair(Value, NewChain);
7371 }
7372 
7373 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
7374                                              SelectionDAG &DAG) const {
7375   SDLoc SL(ST);
7376 
7377   SDValue Chain = ST->getChain();
7378   SDValue BasePtr = ST->getBasePtr();
7379   SDValue Value = ST->getValue();
7380   EVT StVT = ST->getMemoryVT();
7381 
7382   if (StVT.isScalableVector())
7383     report_fatal_error("Cannot scalarize scalable vector stores");
7384 
7385   // The type of the data we want to save
7386   EVT RegVT = Value.getValueType();
7387   EVT RegSclVT = RegVT.getScalarType();
7388 
7389   // The type of data as saved in memory.
7390   EVT MemSclVT = StVT.getScalarType();
7391 
7392   unsigned NumElem = StVT.getVectorNumElements();
7393 
7394   // A vector must always be stored in memory as-is, i.e. without any padding
7395   // between the elements, since various code depend on it, e.g. in the
7396   // handling of a bitcast of a vector type to int, which may be done with a
7397   // vector store followed by an integer load. A vector that does not have
7398   // elements that are byte-sized must therefore be stored as an integer
7399   // built out of the extracted vector elements.
7400   if (!MemSclVT.isByteSized()) {
7401     unsigned NumBits = StVT.getSizeInBits();
7402     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
7403 
7404     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
7405 
7406     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7407       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
7408                                 DAG.getVectorIdxConstant(Idx, SL));
7409       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
7410       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
7411       unsigned ShiftIntoIdx =
7412           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
7413       SDValue ShiftAmount =
7414           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
7415       SDValue ShiftedElt =
7416           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
7417       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
7418     }
7419 
7420     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
7421                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
7422                         ST->getAAInfo());
7423   }
7424 
7425   // Store Stride in bytes
7426   unsigned Stride = MemSclVT.getSizeInBits() / 8;
7427   assert(Stride && "Zero stride!");
7428   // Extract each of the elements from the original vector and save them into
7429   // memory individually.
7430   SmallVector<SDValue, 8> Stores;
7431   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7432     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
7433                               DAG.getVectorIdxConstant(Idx, SL));
7434 
7435     SDValue Ptr =
7436         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
7437 
7438     // This scalar TruncStore may be illegal, but we legalize it later.
7439     SDValue Store = DAG.getTruncStore(
7440         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
7441         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
7442         ST->getAAInfo());
7443 
7444     Stores.push_back(Store);
7445   }
7446 
7447   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
7448 }
7449 
7450 std::pair<SDValue, SDValue>
7451 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
7452   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
7453          "unaligned indexed loads not implemented!");
7454   SDValue Chain = LD->getChain();
7455   SDValue Ptr = LD->getBasePtr();
7456   EVT VT = LD->getValueType(0);
7457   EVT LoadedVT = LD->getMemoryVT();
7458   SDLoc dl(LD);
7459   auto &MF = DAG.getMachineFunction();
7460 
7461   if (VT.isFloatingPoint() || VT.isVector()) {
7462     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
7463     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
7464       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
7465           LoadedVT.isVector()) {
7466         // Scalarize the load and let the individual components be handled.
7467         return scalarizeVectorLoad(LD, DAG);
7468       }
7469 
7470       // Expand to a (misaligned) integer load of the same size,
7471       // then bitconvert to floating point or vector.
7472       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
7473                                     LD->getMemOperand());
7474       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
7475       if (LoadedVT != VT)
7476         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
7477                              ISD::ANY_EXTEND, dl, VT, Result);
7478 
7479       return std::make_pair(Result, newLoad.getValue(1));
7480     }
7481 
7482     // Copy the value to a (aligned) stack slot using (unaligned) integer
7483     // loads and stores, then do a (aligned) load from the stack slot.
7484     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
7485     unsigned LoadedBytes = LoadedVT.getStoreSize();
7486     unsigned RegBytes = RegVT.getSizeInBits() / 8;
7487     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
7488 
7489     // Make sure the stack slot is also aligned for the register type.
7490     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
7491     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
7492     SmallVector<SDValue, 8> Stores;
7493     SDValue StackPtr = StackBase;
7494     unsigned Offset = 0;
7495 
7496     EVT PtrVT = Ptr.getValueType();
7497     EVT StackPtrVT = StackPtr.getValueType();
7498 
7499     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7500     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7501 
7502     // Do all but one copies using the full register width.
7503     for (unsigned i = 1; i < NumRegs; i++) {
7504       // Load one integer register's worth from the original location.
7505       SDValue Load = DAG.getLoad(
7506           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
7507           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
7508           LD->getAAInfo());
7509       // Follow the load with a store to the stack slot.  Remember the store.
7510       Stores.push_back(DAG.getStore(
7511           Load.getValue(1), dl, Load, StackPtr,
7512           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
7513       // Increment the pointers.
7514       Offset += RegBytes;
7515 
7516       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7517       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7518     }
7519 
7520     // The last copy may be partial.  Do an extending load.
7521     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
7522                                   8 * (LoadedBytes - Offset));
7523     SDValue Load =
7524         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
7525                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
7526                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
7527                        LD->getAAInfo());
7528     // Follow the load with a store to the stack slot.  Remember the store.
7529     // On big-endian machines this requires a truncating store to ensure
7530     // that the bits end up in the right place.
7531     Stores.push_back(DAG.getTruncStore(
7532         Load.getValue(1), dl, Load, StackPtr,
7533         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
7534 
7535     // The order of the stores doesn't matter - say it with a TokenFactor.
7536     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7537 
7538     // Finally, perform the original load only redirected to the stack slot.
7539     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
7540                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
7541                           LoadedVT);
7542 
7543     // Callers expect a MERGE_VALUES node.
7544     return std::make_pair(Load, TF);
7545   }
7546 
7547   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
7548          "Unaligned load of unsupported type.");
7549 
7550   // Compute the new VT that is half the size of the old one.  This is an
7551   // integer MVT.
7552   unsigned NumBits = LoadedVT.getSizeInBits();
7553   EVT NewLoadedVT;
7554   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
7555   NumBits >>= 1;
7556 
7557   Align Alignment = LD->getOriginalAlign();
7558   unsigned IncrementSize = NumBits / 8;
7559   ISD::LoadExtType HiExtType = LD->getExtensionType();
7560 
7561   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
7562   if (HiExtType == ISD::NON_EXTLOAD)
7563     HiExtType = ISD::ZEXTLOAD;
7564 
7565   // Load the value in two parts
7566   SDValue Lo, Hi;
7567   if (DAG.getDataLayout().isLittleEndian()) {
7568     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
7569                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7570                         LD->getAAInfo());
7571 
7572     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7573     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
7574                         LD->getPointerInfo().getWithOffset(IncrementSize),
7575                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7576                         LD->getAAInfo());
7577   } else {
7578     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
7579                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7580                         LD->getAAInfo());
7581 
7582     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7583     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
7584                         LD->getPointerInfo().getWithOffset(IncrementSize),
7585                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7586                         LD->getAAInfo());
7587   }
7588 
7589   // aggregate the two parts
7590   SDValue ShiftAmount =
7591       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
7592                                                     DAG.getDataLayout()));
7593   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
7594   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
7595 
7596   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
7597                              Hi.getValue(1));
7598 
7599   return std::make_pair(Result, TF);
7600 }
7601 
7602 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
7603                                              SelectionDAG &DAG) const {
7604   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
7605          "unaligned indexed stores not implemented!");
7606   SDValue Chain = ST->getChain();
7607   SDValue Ptr = ST->getBasePtr();
7608   SDValue Val = ST->getValue();
7609   EVT VT = Val.getValueType();
7610   Align Alignment = ST->getOriginalAlign();
7611   auto &MF = DAG.getMachineFunction();
7612   EVT StoreMemVT = ST->getMemoryVT();
7613 
7614   SDLoc dl(ST);
7615   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
7616     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
7617     if (isTypeLegal(intVT)) {
7618       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
7619           StoreMemVT.isVector()) {
7620         // Scalarize the store and let the individual components be handled.
7621         SDValue Result = scalarizeVectorStore(ST, DAG);
7622         return Result;
7623       }
7624       // Expand to a bitconvert of the value to the integer type of the
7625       // same size, then a (misaligned) int store.
7626       // FIXME: Does not handle truncating floating point stores!
7627       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
7628       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
7629                             Alignment, ST->getMemOperand()->getFlags());
7630       return Result;
7631     }
7632     // Do a (aligned) store to a stack slot, then copy from the stack slot
7633     // to the final destination using (unaligned) integer loads and stores.
7634     MVT RegVT = getRegisterType(
7635         *DAG.getContext(),
7636         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
7637     EVT PtrVT = Ptr.getValueType();
7638     unsigned StoredBytes = StoreMemVT.getStoreSize();
7639     unsigned RegBytes = RegVT.getSizeInBits() / 8;
7640     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
7641 
7642     // Make sure the stack slot is also aligned for the register type.
7643     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
7644     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7645 
7646     // Perform the original store, only redirected to the stack slot.
7647     SDValue Store = DAG.getTruncStore(
7648         Chain, dl, Val, StackPtr,
7649         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
7650 
7651     EVT StackPtrVT = StackPtr.getValueType();
7652 
7653     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7654     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7655     SmallVector<SDValue, 8> Stores;
7656     unsigned Offset = 0;
7657 
7658     // Do all but one copies using the full register width.
7659     for (unsigned i = 1; i < NumRegs; i++) {
7660       // Load one integer register's worth from the stack slot.
7661       SDValue Load = DAG.getLoad(
7662           RegVT, dl, Store, StackPtr,
7663           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
7664       // Store it to the final location.  Remember the store.
7665       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
7666                                     ST->getPointerInfo().getWithOffset(Offset),
7667                                     ST->getOriginalAlign(),
7668                                     ST->getMemOperand()->getFlags()));
7669       // Increment the pointers.
7670       Offset += RegBytes;
7671       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7672       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7673     }
7674 
7675     // The last store may be partial.  Do a truncating store.  On big-endian
7676     // machines this requires an extending load from the stack slot to ensure
7677     // that the bits are in the right place.
7678     EVT LoadMemVT =
7679         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
7680 
7681     // Load from the stack slot.
7682     SDValue Load = DAG.getExtLoad(
7683         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
7684         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
7685 
7686     Stores.push_back(
7687         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
7688                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
7689                           ST->getOriginalAlign(),
7690                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
7691     // The order of the stores doesn't matter - say it with a TokenFactor.
7692     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7693     return Result;
7694   }
7695 
7696   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
7697          "Unaligned store of unknown type.");
7698   // Get the half-size VT
7699   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
7700   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
7701   unsigned IncrementSize = NumBits / 8;
7702 
7703   // Divide the stored value in two parts.
7704   SDValue ShiftAmount = DAG.getConstant(
7705       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
7706   SDValue Lo = Val;
7707   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
7708 
7709   // Store the two parts
7710   SDValue Store1, Store2;
7711   Store1 = DAG.getTruncStore(Chain, dl,
7712                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
7713                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
7714                              ST->getMemOperand()->getFlags());
7715 
7716   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7717   Store2 = DAG.getTruncStore(
7718       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
7719       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
7720       ST->getMemOperand()->getFlags(), ST->getAAInfo());
7721 
7722   SDValue Result =
7723       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
7724   return Result;
7725 }
7726 
7727 SDValue
7728 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
7729                                        const SDLoc &DL, EVT DataVT,
7730                                        SelectionDAG &DAG,
7731                                        bool IsCompressedMemory) const {
7732   SDValue Increment;
7733   EVT AddrVT = Addr.getValueType();
7734   EVT MaskVT = Mask.getValueType();
7735   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
7736          "Incompatible types of Data and Mask");
7737   if (IsCompressedMemory) {
7738     if (DataVT.isScalableVector())
7739       report_fatal_error(
7740           "Cannot currently handle compressed memory with scalable vectors");
7741     // Incrementing the pointer according to number of '1's in the mask.
7742     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
7743     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
7744     if (MaskIntVT.getSizeInBits() < 32) {
7745       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
7746       MaskIntVT = MVT::i32;
7747     }
7748 
7749     // Count '1's with POPCNT.
7750     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
7751     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
7752     // Scale is an element size in bytes.
7753     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
7754                                     AddrVT);
7755     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
7756   } else if (DataVT.isScalableVector()) {
7757     Increment = DAG.getVScale(DL, AddrVT,
7758                               APInt(AddrVT.getFixedSizeInBits(),
7759                                     DataVT.getStoreSize().getKnownMinSize()));
7760   } else
7761     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
7762 
7763   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
7764 }
7765 
7766 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
7767                                        SDValue Idx,
7768                                        EVT VecVT,
7769                                        const SDLoc &dl) {
7770   if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
7771     return Idx;
7772 
7773   EVT IdxVT = Idx.getValueType();
7774   unsigned NElts = VecVT.getVectorMinNumElements();
7775   if (VecVT.isScalableVector()) {
7776     // If this is a constant index and we know the value is less than the
7777     // minimum number of elements then it's safe to return Idx.
7778     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
7779       if (IdxCst->getZExtValue() < NElts)
7780         return Idx;
7781     SDValue VS =
7782         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
7783     SDValue Sub =
7784         DAG.getNode(ISD::SUB, dl, IdxVT, VS, DAG.getConstant(1, dl, IdxVT));
7785     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
7786   }
7787   if (isPowerOf2_32(NElts)) {
7788     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
7789     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
7790                        DAG.getConstant(Imm, dl, IdxVT));
7791   }
7792   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
7793                      DAG.getConstant(NElts - 1, dl, IdxVT));
7794 }
7795 
7796 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
7797                                                 SDValue VecPtr, EVT VecVT,
7798                                                 SDValue Index) const {
7799   SDLoc dl(Index);
7800   // Make sure the index type is big enough to compute in.
7801   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
7802 
7803   EVT EltVT = VecVT.getVectorElementType();
7804 
7805   // Calculate the element offset and add it to the pointer.
7806   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
7807   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
7808          "Converting bits to bytes lost precision");
7809 
7810   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
7811 
7812   EVT IdxVT = Index.getValueType();
7813 
7814   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
7815                       DAG.getConstant(EltSize, dl, IdxVT));
7816   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
7817 }
7818 
7819 //===----------------------------------------------------------------------===//
7820 // Implementation of Emulated TLS Model
7821 //===----------------------------------------------------------------------===//
7822 
7823 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
7824                                                 SelectionDAG &DAG) const {
7825   // Access to address of TLS varialbe xyz is lowered to a function call:
7826   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7827   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7828   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
7829   SDLoc dl(GA);
7830 
7831   ArgListTy Args;
7832   ArgListEntry Entry;
7833   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
7834   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
7835   StringRef EmuTlsVarName(NameString);
7836   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
7837   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
7838   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
7839   Entry.Ty = VoidPtrType;
7840   Args.push_back(Entry);
7841 
7842   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
7843 
7844   TargetLowering::CallLoweringInfo CLI(DAG);
7845   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
7846   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
7847   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7848 
7849   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7850   // At last for X86 targets, maybe good for other targets too?
7851   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7852   MFI.setAdjustsStack(true); // Is this only for X86 target?
7853   MFI.setHasCalls(true);
7854 
7855   assert((GA->getOffset() == 0) &&
7856          "Emulated TLS must have zero offset in GlobalAddressSDNode");
7857   return CallResult.first;
7858 }
7859 
7860 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7861                                                 SelectionDAG &DAG) const {
7862   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7863   if (!isCtlzFast())
7864     return SDValue();
7865   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7866   SDLoc dl(Op);
7867   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7868     if (C->isNullValue() && CC == ISD::SETEQ) {
7869       EVT VT = Op.getOperand(0).getValueType();
7870       SDValue Zext = Op.getOperand(0);
7871       if (VT.bitsLT(MVT::i32)) {
7872         VT = MVT::i32;
7873         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7874       }
7875       unsigned Log2b = Log2_32(VT.getSizeInBits());
7876       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7877       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7878                                 DAG.getConstant(Log2b, dl, MVT::i32));
7879       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7880     }
7881   }
7882   return SDValue();
7883 }
7884 
7885 // Convert redundant addressing modes (e.g. scaling is redundant
7886 // when accessing bytes).
7887 ISD::MemIndexType
7888 TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
7889                                       SDValue Offsets) const {
7890   bool IsScaledIndex =
7891       (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
7892   bool IsSignedIndex =
7893       (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
7894 
7895   // Scaling is unimportant for bytes, canonicalize to unscaled.
7896   if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
7897     IsScaledIndex = false;
7898     IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
7899   }
7900 
7901   return IndexType;
7902 }
7903 
7904 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
7905   SDValue Op0 = Node->getOperand(0);
7906   SDValue Op1 = Node->getOperand(1);
7907   EVT VT = Op0.getValueType();
7908   unsigned Opcode = Node->getOpcode();
7909   SDLoc DL(Node);
7910 
7911   // umin(x,y) -> sub(x,usubsat(x,y))
7912   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
7913       isOperationLegal(ISD::USUBSAT, VT)) {
7914     return DAG.getNode(ISD::SUB, DL, VT, Op0,
7915                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
7916   }
7917 
7918   // umax(x,y) -> add(x,usubsat(y,x))
7919   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
7920       isOperationLegal(ISD::USUBSAT, VT)) {
7921     return DAG.getNode(ISD::ADD, DL, VT, Op0,
7922                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
7923   }
7924 
7925   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
7926   ISD::CondCode CC;
7927   switch (Opcode) {
7928   default: llvm_unreachable("How did we get here?");
7929   case ISD::SMAX: CC = ISD::SETGT; break;
7930   case ISD::SMIN: CC = ISD::SETLT; break;
7931   case ISD::UMAX: CC = ISD::SETUGT; break;
7932   case ISD::UMIN: CC = ISD::SETULT; break;
7933   }
7934 
7935   // FIXME: Should really try to split the vector in case it's legal on a
7936   // subvector.
7937   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
7938     return DAG.UnrollVectorOp(Node);
7939 
7940   SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
7941   return DAG.getSelect(DL, VT, Cond, Op0, Op1);
7942 }
7943 
7944 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7945   unsigned Opcode = Node->getOpcode();
7946   SDValue LHS = Node->getOperand(0);
7947   SDValue RHS = Node->getOperand(1);
7948   EVT VT = LHS.getValueType();
7949   SDLoc dl(Node);
7950 
7951   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7952   assert(VT.isInteger() && "Expected operands to be integers");
7953 
7954   // usub.sat(a, b) -> umax(a, b) - b
7955   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
7956     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7957     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7958   }
7959 
7960   // uadd.sat(a, b) -> umin(a, ~b) + b
7961   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
7962     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7963     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7964     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7965   }
7966 
7967   unsigned OverflowOp;
7968   switch (Opcode) {
7969   case ISD::SADDSAT:
7970     OverflowOp = ISD::SADDO;
7971     break;
7972   case ISD::UADDSAT:
7973     OverflowOp = ISD::UADDO;
7974     break;
7975   case ISD::SSUBSAT:
7976     OverflowOp = ISD::SSUBO;
7977     break;
7978   case ISD::USUBSAT:
7979     OverflowOp = ISD::USUBO;
7980     break;
7981   default:
7982     llvm_unreachable("Expected method to receive signed or unsigned saturation "
7983                      "addition or subtraction node.");
7984   }
7985 
7986   // FIXME: Should really try to split the vector in case it's legal on a
7987   // subvector.
7988   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
7989     return DAG.UnrollVectorOp(Node);
7990 
7991   unsigned BitWidth = LHS.getScalarValueSizeInBits();
7992   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7993   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7994   SDValue SumDiff = Result.getValue(0);
7995   SDValue Overflow = Result.getValue(1);
7996   SDValue Zero = DAG.getConstant(0, dl, VT);
7997   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7998 
7999   if (Opcode == ISD::UADDSAT) {
8000     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8001       // (LHS + RHS) | OverflowMask
8002       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8003       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
8004     }
8005     // Overflow ? 0xffff.... : (LHS + RHS)
8006     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
8007   }
8008 
8009   if (Opcode == ISD::USUBSAT) {
8010     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8011       // (LHS - RHS) & ~OverflowMask
8012       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8013       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
8014       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
8015     }
8016     // Overflow ? 0 : (LHS - RHS)
8017     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
8018   }
8019 
8020   // SatMax -> Overflow && SumDiff < 0
8021   // SatMin -> Overflow && SumDiff >= 0
8022   APInt MinVal = APInt::getSignedMinValue(BitWidth);
8023   APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
8024   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8025   SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8026   SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
8027   Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
8028   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
8029 }
8030 
8031 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
8032   unsigned Opcode = Node->getOpcode();
8033   bool IsSigned = Opcode == ISD::SSHLSAT;
8034   SDValue LHS = Node->getOperand(0);
8035   SDValue RHS = Node->getOperand(1);
8036   EVT VT = LHS.getValueType();
8037   SDLoc dl(Node);
8038 
8039   assert((Node->getOpcode() == ISD::SSHLSAT ||
8040           Node->getOpcode() == ISD::USHLSAT) &&
8041           "Expected a SHLSAT opcode");
8042   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
8043   assert(VT.isInteger() && "Expected operands to be integers");
8044 
8045   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
8046 
8047   unsigned BW = VT.getScalarSizeInBits();
8048   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
8049   SDValue Orig =
8050       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
8051 
8052   SDValue SatVal;
8053   if (IsSigned) {
8054     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
8055     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
8056     SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
8057                              SatMin, SatMax, ISD::SETLT);
8058   } else {
8059     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
8060   }
8061   Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
8062 
8063   return Result;
8064 }
8065 
8066 SDValue
8067 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
8068   assert((Node->getOpcode() == ISD::SMULFIX ||
8069           Node->getOpcode() == ISD::UMULFIX ||
8070           Node->getOpcode() == ISD::SMULFIXSAT ||
8071           Node->getOpcode() == ISD::UMULFIXSAT) &&
8072          "Expected a fixed point multiplication opcode");
8073 
8074   SDLoc dl(Node);
8075   SDValue LHS = Node->getOperand(0);
8076   SDValue RHS = Node->getOperand(1);
8077   EVT VT = LHS.getValueType();
8078   unsigned Scale = Node->getConstantOperandVal(2);
8079   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
8080                      Node->getOpcode() == ISD::UMULFIXSAT);
8081   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
8082                  Node->getOpcode() == ISD::SMULFIXSAT);
8083   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8084   unsigned VTSize = VT.getScalarSizeInBits();
8085 
8086   if (!Scale) {
8087     // [us]mul.fix(a, b, 0) -> mul(a, b)
8088     if (!Saturating) {
8089       if (isOperationLegalOrCustom(ISD::MUL, VT))
8090         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8091     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
8092       SDValue Result =
8093           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8094       SDValue Product = Result.getValue(0);
8095       SDValue Overflow = Result.getValue(1);
8096       SDValue Zero = DAG.getConstant(0, dl, VT);
8097 
8098       APInt MinVal = APInt::getSignedMinValue(VTSize);
8099       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
8100       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8101       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8102       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
8103       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
8104       return DAG.getSelect(dl, VT, Overflow, Result, Product);
8105     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
8106       SDValue Result =
8107           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8108       SDValue Product = Result.getValue(0);
8109       SDValue Overflow = Result.getValue(1);
8110 
8111       APInt MaxVal = APInt::getMaxValue(VTSize);
8112       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8113       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
8114     }
8115   }
8116 
8117   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
8118          "Expected scale to be less than the number of bits if signed or at "
8119          "most the number of bits if unsigned.");
8120   assert(LHS.getValueType() == RHS.getValueType() &&
8121          "Expected both operands to be the same type");
8122 
8123   // Get the upper and lower bits of the result.
8124   SDValue Lo, Hi;
8125   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
8126   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
8127   if (isOperationLegalOrCustom(LoHiOp, VT)) {
8128     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
8129     Lo = Result.getValue(0);
8130     Hi = Result.getValue(1);
8131   } else if (isOperationLegalOrCustom(HiOp, VT)) {
8132     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8133     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
8134   } else if (VT.isVector()) {
8135     return SDValue();
8136   } else {
8137     report_fatal_error("Unable to expand fixed point multiplication.");
8138   }
8139 
8140   if (Scale == VTSize)
8141     // Result is just the top half since we'd be shifting by the width of the
8142     // operand. Overflow impossible so this works for both UMULFIX and
8143     // UMULFIXSAT.
8144     return Hi;
8145 
8146   // The result will need to be shifted right by the scale since both operands
8147   // are scaled. The result is given to us in 2 halves, so we only want part of
8148   // both in the result.
8149   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8150   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
8151                                DAG.getConstant(Scale, dl, ShiftTy));
8152   if (!Saturating)
8153     return Result;
8154 
8155   if (!Signed) {
8156     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
8157     // widened multiplication) aren't all zeroes.
8158 
8159     // Saturate to max if ((Hi >> Scale) != 0),
8160     // which is the same as if (Hi > ((1 << Scale) - 1))
8161     APInt MaxVal = APInt::getMaxValue(VTSize);
8162     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
8163                                       dl, VT);
8164     Result = DAG.getSelectCC(dl, Hi, LowMask,
8165                              DAG.getConstant(MaxVal, dl, VT), Result,
8166                              ISD::SETUGT);
8167 
8168     return Result;
8169   }
8170 
8171   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
8172   // widened multiplication) aren't all ones or all zeroes.
8173 
8174   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
8175   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
8176 
8177   if (Scale == 0) {
8178     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
8179                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
8180     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
8181     // Saturated to SatMin if wide product is negative, and SatMax if wide
8182     // product is positive ...
8183     SDValue Zero = DAG.getConstant(0, dl, VT);
8184     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
8185                                                ISD::SETLT);
8186     // ... but only if we overflowed.
8187     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
8188   }
8189 
8190   //  We handled Scale==0 above so all the bits to examine is in Hi.
8191 
8192   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
8193   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
8194   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
8195                                     dl, VT);
8196   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
8197   // Saturate to min if (Hi >> (Scale - 1)) < -1),
8198   // which is the same as if (HI < (-1 << (Scale - 1))
8199   SDValue HighMask =
8200       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
8201                       dl, VT);
8202   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
8203   return Result;
8204 }
8205 
8206 SDValue
8207 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
8208                                     SDValue LHS, SDValue RHS,
8209                                     unsigned Scale, SelectionDAG &DAG) const {
8210   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
8211           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
8212          "Expected a fixed point division opcode");
8213 
8214   EVT VT = LHS.getValueType();
8215   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
8216   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
8217   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8218 
8219   // If there is enough room in the type to upscale the LHS or downscale the
8220   // RHS before the division, we can perform it in this type without having to
8221   // resize. For signed operations, the LHS headroom is the number of
8222   // redundant sign bits, and for unsigned ones it is the number of zeroes.
8223   // The headroom for the RHS is the number of trailing zeroes.
8224   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
8225                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
8226   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
8227 
8228   // For signed saturating operations, we need to be able to detect true integer
8229   // division overflow; that is, when you have MIN / -EPS. However, this
8230   // is undefined behavior and if we emit divisions that could take such
8231   // values it may cause undesired behavior (arithmetic exceptions on x86, for
8232   // example).
8233   // Avoid this by requiring an extra bit so that we never get this case.
8234   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
8235   // signed saturating division, we need to emit a whopping 32-bit division.
8236   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
8237     return SDValue();
8238 
8239   unsigned LHSShift = std::min(LHSLead, Scale);
8240   unsigned RHSShift = Scale - LHSShift;
8241 
8242   // At this point, we know that if we shift the LHS up by LHSShift and the
8243   // RHS down by RHSShift, we can emit a regular division with a final scaling
8244   // factor of Scale.
8245 
8246   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8247   if (LHSShift)
8248     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
8249                       DAG.getConstant(LHSShift, dl, ShiftTy));
8250   if (RHSShift)
8251     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
8252                       DAG.getConstant(RHSShift, dl, ShiftTy));
8253 
8254   SDValue Quot;
8255   if (Signed) {
8256     // For signed operations, if the resulting quotient is negative and the
8257     // remainder is nonzero, subtract 1 from the quotient to round towards
8258     // negative infinity.
8259     SDValue Rem;
8260     // FIXME: Ideally we would always produce an SDIVREM here, but if the
8261     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
8262     // we couldn't just form a libcall, but the type legalizer doesn't do it.
8263     if (isTypeLegal(VT) &&
8264         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
8265       Quot = DAG.getNode(ISD::SDIVREM, dl,
8266                          DAG.getVTList(VT, VT),
8267                          LHS, RHS);
8268       Rem = Quot.getValue(1);
8269       Quot = Quot.getValue(0);
8270     } else {
8271       Quot = DAG.getNode(ISD::SDIV, dl, VT,
8272                          LHS, RHS);
8273       Rem = DAG.getNode(ISD::SREM, dl, VT,
8274                         LHS, RHS);
8275     }
8276     SDValue Zero = DAG.getConstant(0, dl, VT);
8277     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
8278     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
8279     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
8280     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
8281     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
8282                                DAG.getConstant(1, dl, VT));
8283     Quot = DAG.getSelect(dl, VT,
8284                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
8285                          Sub1, Quot);
8286   } else
8287     Quot = DAG.getNode(ISD::UDIV, dl, VT,
8288                        LHS, RHS);
8289 
8290   return Quot;
8291 }
8292 
8293 void TargetLowering::expandUADDSUBO(
8294     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
8295   SDLoc dl(Node);
8296   SDValue LHS = Node->getOperand(0);
8297   SDValue RHS = Node->getOperand(1);
8298   bool IsAdd = Node->getOpcode() == ISD::UADDO;
8299 
8300   // If ADD/SUBCARRY is legal, use that instead.
8301   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
8302   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
8303     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
8304     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
8305                                     { LHS, RHS, CarryIn });
8306     Result = SDValue(NodeCarry.getNode(), 0);
8307     Overflow = SDValue(NodeCarry.getNode(), 1);
8308     return;
8309   }
8310 
8311   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
8312                             LHS.getValueType(), LHS, RHS);
8313 
8314   EVT ResultType = Node->getValueType(1);
8315   EVT SetCCType = getSetCCResultType(
8316       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
8317   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
8318   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
8319   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
8320 }
8321 
8322 void TargetLowering::expandSADDSUBO(
8323     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
8324   SDLoc dl(Node);
8325   SDValue LHS = Node->getOperand(0);
8326   SDValue RHS = Node->getOperand(1);
8327   bool IsAdd = Node->getOpcode() == ISD::SADDO;
8328 
8329   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
8330                             LHS.getValueType(), LHS, RHS);
8331 
8332   EVT ResultType = Node->getValueType(1);
8333   EVT OType = getSetCCResultType(
8334       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
8335 
8336   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8337   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
8338   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
8339     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
8340     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
8341     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
8342     return;
8343   }
8344 
8345   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
8346 
8347   // For an addition, the result should be less than one of the operands (LHS)
8348   // if and only if the other operand (RHS) is negative, otherwise there will
8349   // be overflow.
8350   // For a subtraction, the result should be less than one of the operands
8351   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8352   // otherwise there will be overflow.
8353   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
8354   SDValue ConditionRHS =
8355       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
8356 
8357   Overflow = DAG.getBoolExtOrTrunc(
8358       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
8359       ResultType, ResultType);
8360 }
8361 
8362 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
8363                                 SDValue &Overflow, SelectionDAG &DAG) const {
8364   SDLoc dl(Node);
8365   EVT VT = Node->getValueType(0);
8366   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8367   SDValue LHS = Node->getOperand(0);
8368   SDValue RHS = Node->getOperand(1);
8369   bool isSigned = Node->getOpcode() == ISD::SMULO;
8370 
8371   // For power-of-two multiplications we can use a simpler shift expansion.
8372   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
8373     const APInt &C = RHSC->getAPIntValue();
8374     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
8375     if (C.isPowerOf2()) {
8376       // smulo(x, signed_min) is same as umulo(x, signed_min).
8377       bool UseArithShift = isSigned && !C.isMinSignedValue();
8378       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
8379       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
8380       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
8381       Overflow = DAG.getSetCC(dl, SetCCVT,
8382           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
8383                       dl, VT, Result, ShiftAmt),
8384           LHS, ISD::SETNE);
8385       return true;
8386     }
8387   }
8388 
8389   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
8390   if (VT.isVector())
8391     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
8392                               VT.getVectorNumElements());
8393 
8394   SDValue BottomHalf;
8395   SDValue TopHalf;
8396   static const unsigned Ops[2][3] =
8397       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
8398         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
8399   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
8400     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8401     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
8402   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
8403     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
8404                              RHS);
8405     TopHalf = BottomHalf.getValue(1);
8406   } else if (isTypeLegal(WideVT)) {
8407     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
8408     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
8409     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
8410     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
8411     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
8412         getShiftAmountTy(WideVT, DAG.getDataLayout()));
8413     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
8414                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
8415   } else {
8416     if (VT.isVector())
8417       return false;
8418 
8419     // We can fall back to a libcall with an illegal type for the MUL if we
8420     // have a libcall big enough.
8421     // Also, we can fall back to a division in some cases, but that's a big
8422     // performance hit in the general case.
8423     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
8424     if (WideVT == MVT::i16)
8425       LC = RTLIB::MUL_I16;
8426     else if (WideVT == MVT::i32)
8427       LC = RTLIB::MUL_I32;
8428     else if (WideVT == MVT::i64)
8429       LC = RTLIB::MUL_I64;
8430     else if (WideVT == MVT::i128)
8431       LC = RTLIB::MUL_I128;
8432     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
8433 
8434     SDValue HiLHS;
8435     SDValue HiRHS;
8436     if (isSigned) {
8437       // The high part is obtained by SRA'ing all but one of the bits of low
8438       // part.
8439       unsigned LoSize = VT.getFixedSizeInBits();
8440       HiLHS =
8441           DAG.getNode(ISD::SRA, dl, VT, LHS,
8442                       DAG.getConstant(LoSize - 1, dl,
8443                                       getPointerTy(DAG.getDataLayout())));
8444       HiRHS =
8445           DAG.getNode(ISD::SRA, dl, VT, RHS,
8446                       DAG.getConstant(LoSize - 1, dl,
8447                                       getPointerTy(DAG.getDataLayout())));
8448     } else {
8449         HiLHS = DAG.getConstant(0, dl, VT);
8450         HiRHS = DAG.getConstant(0, dl, VT);
8451     }
8452 
8453     // Here we're passing the 2 arguments explicitly as 4 arguments that are
8454     // pre-lowered to the correct types. This all depends upon WideVT not
8455     // being a legal type for the architecture and thus has to be split to
8456     // two arguments.
8457     SDValue Ret;
8458     TargetLowering::MakeLibCallOptions CallOptions;
8459     CallOptions.setSExt(isSigned);
8460     CallOptions.setIsPostTypeLegalization(true);
8461     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
8462       // Halves of WideVT are packed into registers in different order
8463       // depending on platform endianness. This is usually handled by
8464       // the C calling convention, but we can't defer to it in
8465       // the legalizer.
8466       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
8467       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
8468     } else {
8469       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
8470       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
8471     }
8472     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
8473            "Ret value is a collection of constituent nodes holding result.");
8474     if (DAG.getDataLayout().isLittleEndian()) {
8475       // Same as above.
8476       BottomHalf = Ret.getOperand(0);
8477       TopHalf = Ret.getOperand(1);
8478     } else {
8479       BottomHalf = Ret.getOperand(1);
8480       TopHalf = Ret.getOperand(0);
8481     }
8482   }
8483 
8484   Result = BottomHalf;
8485   if (isSigned) {
8486     SDValue ShiftAmt = DAG.getConstant(
8487         VT.getScalarSizeInBits() - 1, dl,
8488         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
8489     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
8490     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
8491   } else {
8492     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
8493                             DAG.getConstant(0, dl, VT), ISD::SETNE);
8494   }
8495 
8496   // Truncate the result if SetCC returns a larger type than needed.
8497   EVT RType = Node->getValueType(1);
8498   if (RType.bitsLT(Overflow.getValueType()))
8499     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
8500 
8501   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
8502          "Unexpected result type for S/UMULO legalization");
8503   return true;
8504 }
8505 
8506 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
8507   SDLoc dl(Node);
8508   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
8509   SDValue Op = Node->getOperand(0);
8510   EVT VT = Op.getValueType();
8511 
8512   if (VT.isScalableVector())
8513     report_fatal_error(
8514         "Expanding reductions for scalable vectors is undefined.");
8515 
8516   // Try to use a shuffle reduction for power of two vectors.
8517   if (VT.isPow2VectorType()) {
8518     while (VT.getVectorNumElements() > 1) {
8519       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
8520       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
8521         break;
8522 
8523       SDValue Lo, Hi;
8524       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
8525       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
8526       VT = HalfVT;
8527     }
8528   }
8529 
8530   EVT EltVT = VT.getVectorElementType();
8531   unsigned NumElts = VT.getVectorNumElements();
8532 
8533   SmallVector<SDValue, 8> Ops;
8534   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
8535 
8536   SDValue Res = Ops[0];
8537   for (unsigned i = 1; i < NumElts; i++)
8538     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
8539 
8540   // Result type may be wider than element type.
8541   if (EltVT != Node->getValueType(0))
8542     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
8543   return Res;
8544 }
8545 
8546 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
8547   SDLoc dl(Node);
8548   SDValue AccOp = Node->getOperand(0);
8549   SDValue VecOp = Node->getOperand(1);
8550   SDNodeFlags Flags = Node->getFlags();
8551 
8552   EVT VT = VecOp.getValueType();
8553   EVT EltVT = VT.getVectorElementType();
8554 
8555   if (VT.isScalableVector())
8556     report_fatal_error(
8557         "Expanding reductions for scalable vectors is undefined.");
8558 
8559   unsigned NumElts = VT.getVectorNumElements();
8560 
8561   SmallVector<SDValue, 8> Ops;
8562   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
8563 
8564   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
8565 
8566   SDValue Res = AccOp;
8567   for (unsigned i = 0; i < NumElts; i++)
8568     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
8569 
8570   return Res;
8571 }
8572 
8573 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
8574                                SelectionDAG &DAG) const {
8575   EVT VT = Node->getValueType(0);
8576   SDLoc dl(Node);
8577   bool isSigned = Node->getOpcode() == ISD::SREM;
8578   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
8579   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
8580   SDValue Dividend = Node->getOperand(0);
8581   SDValue Divisor = Node->getOperand(1);
8582   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
8583     SDVTList VTs = DAG.getVTList(VT, VT);
8584     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
8585     return true;
8586   }
8587   if (isOperationLegalOrCustom(DivOpc, VT)) {
8588     // X % Y -> X-X/Y*Y
8589     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
8590     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
8591     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
8592     return true;
8593   }
8594   return false;
8595 }
8596 
8597 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
8598                                             SelectionDAG &DAG) const {
8599   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
8600   SDLoc dl(SDValue(Node, 0));
8601   SDValue Src = Node->getOperand(0);
8602 
8603   // DstVT is the result type, while SatVT is the size to which we saturate
8604   EVT SrcVT = Src.getValueType();
8605   EVT DstVT = Node->getValueType(0);
8606 
8607   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
8608   unsigned SatWidth = SatVT.getScalarSizeInBits();
8609   unsigned DstWidth = DstVT.getScalarSizeInBits();
8610   assert(SatWidth <= DstWidth &&
8611          "Expected saturation width smaller than result width");
8612 
8613   // Determine minimum and maximum integer values and their corresponding
8614   // floating-point values.
8615   APInt MinInt, MaxInt;
8616   if (IsSigned) {
8617     MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
8618     MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
8619   } else {
8620     MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
8621     MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
8622   }
8623 
8624   // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
8625   // libcall emission cannot handle this. Large result types will fail.
8626   if (SrcVT == MVT::f16) {
8627     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
8628     SrcVT = Src.getValueType();
8629   }
8630 
8631   APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
8632   APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
8633 
8634   APFloat::opStatus MinStatus =
8635       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8636   APFloat::opStatus MaxStatus =
8637       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8638   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8639                              !(MaxStatus & APFloat::opStatus::opInexact);
8640 
8641   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
8642   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
8643 
8644   // If the integer bounds are exactly representable as floats and min/max are
8645   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
8646   // of comparisons and selects.
8647   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
8648                      isOperationLegal(ISD::FMAXNUM, SrcVT);
8649   if (AreExactFloatBounds && MinMaxLegal) {
8650     SDValue Clamped = Src;
8651 
8652     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8653     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
8654     // Clamp by MaxFloat from above. NaN cannot occur.
8655     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
8656     // Convert clamped value to integer.
8657     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
8658                                   dl, DstVT, Clamped);
8659 
8660     // In the unsigned case we're done, because we mapped NaN to MinFloat,
8661     // which will cast to zero.
8662     if (!IsSigned)
8663       return FpToInt;
8664 
8665     // Otherwise, select 0 if Src is NaN.
8666     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
8667     return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
8668                            ISD::CondCode::SETUO);
8669   }
8670 
8671   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
8672   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
8673 
8674   // Result of direct conversion. The assumption here is that the operation is
8675   // non-trapping and it's fine to apply it to an out-of-range value if we
8676   // select it away later.
8677   SDValue FpToInt =
8678       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
8679 
8680   SDValue Select = FpToInt;
8681 
8682   // If Src ULT MinFloat, select MinInt. In particular, this also selects
8683   // MinInt if Src is NaN.
8684   Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
8685                            ISD::CondCode::SETULT);
8686   // If Src OGT MaxFloat, select MaxInt.
8687   Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
8688                            ISD::CondCode::SETOGT);
8689 
8690   // In the unsigned case we are done, because we mapped NaN to MinInt, which
8691   // is already zero.
8692   if (!IsSigned)
8693     return Select;
8694 
8695   // Otherwise, select 0 if Src is NaN.
8696   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
8697   return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
8698 }
8699 
8700 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
8701                                            SelectionDAG &DAG) const {
8702   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
8703   assert(Node->getValueType(0).isScalableVector() &&
8704          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
8705 
8706   EVT VT = Node->getValueType(0);
8707   SDValue V1 = Node->getOperand(0);
8708   SDValue V2 = Node->getOperand(1);
8709   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
8710   SDLoc DL(Node);
8711 
8712   // Expand through memory thusly:
8713   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
8714   //  Store V1, Ptr
8715   //  Store V2, Ptr + sizeof(V1)
8716   //  If (Imm < 0)
8717   //    TrailingElts = -Imm
8718   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
8719   //  else
8720   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
8721   //  Res = Load Ptr
8722 
8723   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
8724 
8725   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
8726                                VT.getVectorElementCount() * 2);
8727   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
8728   EVT PtrVT = StackPtr.getValueType();
8729   auto &MF = DAG.getMachineFunction();
8730   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
8731   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
8732 
8733   // Store the lo part of CONCAT_VECTORS(V1, V2)
8734   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
8735   // Store the hi part of CONCAT_VECTORS(V1, V2)
8736   SDValue OffsetToV2 = DAG.getVScale(
8737       DL, PtrVT,
8738       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
8739   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
8740   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
8741 
8742   if (Imm >= 0) {
8743     // Load back the required element. getVectorElementPointer takes care of
8744     // clamping the index if it's out-of-bounds.
8745     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
8746     // Load the spliced result
8747     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
8748                        MachinePointerInfo::getUnknownStack(MF));
8749   }
8750 
8751   uint64_t TrailingElts = -Imm;
8752 
8753   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
8754   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
8755   SDValue TrailingBytes =
8756       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
8757 
8758   if (TrailingElts > VT.getVectorMinNumElements()) {
8759     SDValue VLBytes = DAG.getVScale(
8760         DL, PtrVT,
8761         APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
8762     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
8763   }
8764 
8765   // Calculate the start address of the spliced result.
8766   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
8767 
8768   // Load the spliced result
8769   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
8770                      MachinePointerInfo::getUnknownStack(MF));
8771 }
8772 
8773 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
8774                                            SDValue &LHS, SDValue &RHS,
8775                                            SDValue &CC, bool &NeedInvert,
8776                                            const SDLoc &dl, SDValue &Chain,
8777                                            bool IsSignaling) const {
8778   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8779   MVT OpVT = LHS.getSimpleValueType();
8780   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
8781   NeedInvert = false;
8782   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
8783   default:
8784     llvm_unreachable("Unknown condition code action!");
8785   case TargetLowering::Legal:
8786     // Nothing to do.
8787     break;
8788   case TargetLowering::Expand: {
8789     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
8790     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
8791       std::swap(LHS, RHS);
8792       CC = DAG.getCondCode(InvCC);
8793       return true;
8794     }
8795     // Swapping operands didn't work. Try inverting the condition.
8796     bool NeedSwap = false;
8797     InvCC = getSetCCInverse(CCCode, OpVT);
8798     if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
8799       // If inverting the condition is not enough, try swapping operands
8800       // on top of it.
8801       InvCC = ISD::getSetCCSwappedOperands(InvCC);
8802       NeedSwap = true;
8803     }
8804     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
8805       CC = DAG.getCondCode(InvCC);
8806       NeedInvert = true;
8807       if (NeedSwap)
8808         std::swap(LHS, RHS);
8809       return true;
8810     }
8811 
8812     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
8813     unsigned Opc = 0;
8814     switch (CCCode) {
8815     default:
8816       llvm_unreachable("Don't know how to expand this condition!");
8817     case ISD::SETUO:
8818       if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
8819         CC1 = ISD::SETUNE;
8820         CC2 = ISD::SETUNE;
8821         Opc = ISD::OR;
8822         break;
8823       }
8824       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
8825              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
8826       NeedInvert = true;
8827       LLVM_FALLTHROUGH;
8828     case ISD::SETO:
8829       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
8830              "If SETO is expanded, SETOEQ must be legal!");
8831       CC1 = ISD::SETOEQ;
8832       CC2 = ISD::SETOEQ;
8833       Opc = ISD::AND;
8834       break;
8835     case ISD::SETONE:
8836     case ISD::SETUEQ:
8837       // If the SETUO or SETO CC isn't legal, we might be able to use
8838       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
8839       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
8840       // the operands.
8841       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
8842       if (!TLI.isCondCodeLegal(CC2, OpVT) &&
8843           (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
8844            TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
8845         CC1 = ISD::SETOGT;
8846         CC2 = ISD::SETOLT;
8847         Opc = ISD::OR;
8848         NeedInvert = ((unsigned)CCCode & 0x8U);
8849         break;
8850       }
8851       LLVM_FALLTHROUGH;
8852     case ISD::SETOEQ:
8853     case ISD::SETOGT:
8854     case ISD::SETOGE:
8855     case ISD::SETOLT:
8856     case ISD::SETOLE:
8857     case ISD::SETUNE:
8858     case ISD::SETUGT:
8859     case ISD::SETUGE:
8860     case ISD::SETULT:
8861     case ISD::SETULE:
8862       // If we are floating point, assign and break, otherwise fall through.
8863       if (!OpVT.isInteger()) {
8864         // We can use the 4th bit to tell if we are the unordered
8865         // or ordered version of the opcode.
8866         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
8867         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
8868         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
8869         break;
8870       }
8871       // Fallthrough if we are unsigned integer.
8872       LLVM_FALLTHROUGH;
8873     case ISD::SETLE:
8874     case ISD::SETGT:
8875     case ISD::SETGE:
8876     case ISD::SETLT:
8877     case ISD::SETNE:
8878     case ISD::SETEQ:
8879       // If all combinations of inverting the condition and swapping operands
8880       // didn't work then we have no means to expand the condition.
8881       llvm_unreachable("Don't know how to expand this condition!");
8882     }
8883 
8884     SDValue SetCC1, SetCC2;
8885     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
8886       // If we aren't the ordered or unorder operation,
8887       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
8888       SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
8889       SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
8890     } else {
8891       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
8892       SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
8893       SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
8894     }
8895     if (Chain)
8896       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
8897                           SetCC2.getValue(1));
8898     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
8899     RHS = SDValue();
8900     CC = SDValue();
8901     return true;
8902   }
8903   }
8904   return false;
8905 }
8906