1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // First, check if tail calls have been disabled in this function.
56   if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
57     return false;
58 
59   // Conservatively require the attributes of the call to match those of
60   // the return. Ignore NoAlias and NonNull because they don't affect the
61   // call sequence.
62   AttributeList CallerAttrs = F.getAttributes();
63   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
64           .removeAttribute(Attribute::NoAlias)
65           .removeAttribute(Attribute::NonNull)
66           .hasAttributes())
67     return false;
68 
69   // It's not safe to eliminate the sign / zero extension of the return value.
70   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
71       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
72     return false;
73 
74   // Check if the only use is a function return node.
75   return isUsedByReturnOnly(Node, Chain);
76 }
77 
78 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
79     const uint32_t *CallerPreservedMask,
80     const SmallVectorImpl<CCValAssign> &ArgLocs,
81     const SmallVectorImpl<SDValue> &OutVals) const {
82   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
83     const CCValAssign &ArgLoc = ArgLocs[I];
84     if (!ArgLoc.isRegLoc())
85       continue;
86     MCRegister Reg = ArgLoc.getLocReg();
87     // Only look at callee saved registers.
88     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
89       continue;
90     // Check that we pass the value used for the caller.
91     // (We look for a CopyFromReg reading a virtual register that is used
92     //  for the function live-in value of register Reg)
93     SDValue Value = OutVals[I];
94     if (Value->getOpcode() != ISD::CopyFromReg)
95       return false;
96     MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
97     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
98       return false;
99   }
100   return true;
101 }
102 
103 /// Set CallLoweringInfo attribute flags based on a call instruction
104 /// and called function attributes.
105 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
106                                                      unsigned ArgIdx) {
107   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
108   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
109   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
110   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
111   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
112   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
113   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
114   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
115   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
116   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
117   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
118   Alignment = Call->getParamAlign(ArgIdx);
119   ByValType = nullptr;
120   if (IsByVal)
121     ByValType = Call->getParamByValType(ArgIdx);
122   PreallocatedType = nullptr;
123   if (IsPreallocated)
124     PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
125 }
126 
127 /// Generate a libcall taking the given operands as arguments and returning a
128 /// result of type RetVT.
129 std::pair<SDValue, SDValue>
130 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
131                             ArrayRef<SDValue> Ops,
132                             MakeLibCallOptions CallOptions,
133                             const SDLoc &dl,
134                             SDValue InChain) const {
135   if (!InChain)
136     InChain = DAG.getEntryNode();
137 
138   TargetLowering::ArgListTy Args;
139   Args.reserve(Ops.size());
140 
141   TargetLowering::ArgListEntry Entry;
142   for (unsigned i = 0; i < Ops.size(); ++i) {
143     SDValue NewOp = Ops[i];
144     Entry.Node = NewOp;
145     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
146     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
147                                                  CallOptions.IsSExt);
148     Entry.IsZExt = !Entry.IsSExt;
149 
150     if (CallOptions.IsSoften &&
151         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
152       Entry.IsSExt = Entry.IsZExt = false;
153     }
154     Args.push_back(Entry);
155   }
156 
157   if (LC == RTLIB::UNKNOWN_LIBCALL)
158     report_fatal_error("Unsupported library call operation!");
159   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
160                                          getPointerTy(DAG.getDataLayout()));
161 
162   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
163   TargetLowering::CallLoweringInfo CLI(DAG);
164   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
165   bool zeroExtend = !signExtend;
166 
167   if (CallOptions.IsSoften &&
168       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
169     signExtend = zeroExtend = false;
170   }
171 
172   CLI.setDebugLoc(dl)
173       .setChain(InChain)
174       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
175       .setNoReturn(CallOptions.DoesNotReturn)
176       .setDiscardResult(!CallOptions.IsReturnValueUsed)
177       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
178       .setSExtResult(signExtend)
179       .setZExtResult(zeroExtend);
180   return LowerCallTo(CLI);
181 }
182 
183 bool TargetLowering::findOptimalMemOpLowering(
184     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
185     unsigned SrcAS, const AttributeList &FuncAttributes) const {
186   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
187     return false;
188 
189   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
190 
191   if (VT == MVT::Other) {
192     // Use the largest integer type whose alignment constraints are satisfied.
193     // We only need to check DstAlign here as SrcAlign is always greater or
194     // equal to DstAlign (or zero).
195     VT = MVT::i64;
196     if (Op.isFixedDstAlign())
197       while (
198           Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
199           !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
200         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
201     assert(VT.isInteger());
202 
203     // Find the largest legal integer type.
204     MVT LVT = MVT::i64;
205     while (!isTypeLegal(LVT))
206       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
207     assert(LVT.isInteger());
208 
209     // If the type we've chosen is larger than the largest legal integer type
210     // then use that instead.
211     if (VT.bitsGT(LVT))
212       VT = LVT;
213   }
214 
215   unsigned NumMemOps = 0;
216   uint64_t Size = Op.size();
217   while (Size) {
218     unsigned VTSize = VT.getSizeInBits() / 8;
219     while (VTSize > Size) {
220       // For now, only use non-vector load / store's for the left-over pieces.
221       EVT NewVT = VT;
222       unsigned NewVTSize;
223 
224       bool Found = false;
225       if (VT.isVector() || VT.isFloatingPoint()) {
226         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
227         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
228             isSafeMemOpType(NewVT.getSimpleVT()))
229           Found = true;
230         else if (NewVT == MVT::i64 &&
231                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
232                  isSafeMemOpType(MVT::f64)) {
233           // i64 is usually not legal on 32-bit targets, but f64 may be.
234           NewVT = MVT::f64;
235           Found = true;
236         }
237       }
238 
239       if (!Found) {
240         do {
241           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
242           if (NewVT == MVT::i8)
243             break;
244         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
245       }
246       NewVTSize = NewVT.getSizeInBits() / 8;
247 
248       // If the new VT cannot cover all of the remaining bits, then consider
249       // issuing a (or a pair of) unaligned and overlapping load / store.
250       bool Fast;
251       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
252           allowsMisalignedMemoryAccesses(
253               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
254               MachineMemOperand::MONone, &Fast) &&
255           Fast)
256         VTSize = Size;
257       else {
258         VT = NewVT;
259         VTSize = NewVTSize;
260       }
261     }
262 
263     if (++NumMemOps > Limit)
264       return false;
265 
266     MemOps.push_back(VT);
267     Size -= VTSize;
268   }
269 
270   return true;
271 }
272 
273 /// Soften the operands of a comparison. This code is shared among BR_CC,
274 /// SELECT_CC, and SETCC handlers.
275 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
276                                          SDValue &NewLHS, SDValue &NewRHS,
277                                          ISD::CondCode &CCCode,
278                                          const SDLoc &dl, const SDValue OldLHS,
279                                          const SDValue OldRHS) const {
280   SDValue Chain;
281   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
282                              OldRHS, Chain);
283 }
284 
285 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
286                                          SDValue &NewLHS, SDValue &NewRHS,
287                                          ISD::CondCode &CCCode,
288                                          const SDLoc &dl, const SDValue OldLHS,
289                                          const SDValue OldRHS,
290                                          SDValue &Chain,
291                                          bool IsSignaling) const {
292   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
293   // not supporting it. We can update this code when libgcc provides such
294   // functions.
295 
296   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
297          && "Unsupported setcc type!");
298 
299   // Expand into one or more soft-fp libcall(s).
300   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
301   bool ShouldInvertCC = false;
302   switch (CCCode) {
303   case ISD::SETEQ:
304   case ISD::SETOEQ:
305     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
306           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
307           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
308     break;
309   case ISD::SETNE:
310   case ISD::SETUNE:
311     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
312           (VT == MVT::f64) ? RTLIB::UNE_F64 :
313           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
314     break;
315   case ISD::SETGE:
316   case ISD::SETOGE:
317     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
318           (VT == MVT::f64) ? RTLIB::OGE_F64 :
319           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
320     break;
321   case ISD::SETLT:
322   case ISD::SETOLT:
323     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
324           (VT == MVT::f64) ? RTLIB::OLT_F64 :
325           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
326     break;
327   case ISD::SETLE:
328   case ISD::SETOLE:
329     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
330           (VT == MVT::f64) ? RTLIB::OLE_F64 :
331           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
332     break;
333   case ISD::SETGT:
334   case ISD::SETOGT:
335     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
336           (VT == MVT::f64) ? RTLIB::OGT_F64 :
337           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
338     break;
339   case ISD::SETO:
340     ShouldInvertCC = true;
341     LLVM_FALLTHROUGH;
342   case ISD::SETUO:
343     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
344           (VT == MVT::f64) ? RTLIB::UO_F64 :
345           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
346     break;
347   case ISD::SETONE:
348     // SETONE = O && UNE
349     ShouldInvertCC = true;
350     LLVM_FALLTHROUGH;
351   case ISD::SETUEQ:
352     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
353           (VT == MVT::f64) ? RTLIB::UO_F64 :
354           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
355     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
356           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
357           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
358     break;
359   default:
360     // Invert CC for unordered comparisons
361     ShouldInvertCC = true;
362     switch (CCCode) {
363     case ISD::SETULT:
364       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
365             (VT == MVT::f64) ? RTLIB::OGE_F64 :
366             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
367       break;
368     case ISD::SETULE:
369       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
370             (VT == MVT::f64) ? RTLIB::OGT_F64 :
371             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
372       break;
373     case ISD::SETUGT:
374       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
375             (VT == MVT::f64) ? RTLIB::OLE_F64 :
376             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
377       break;
378     case ISD::SETUGE:
379       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
380             (VT == MVT::f64) ? RTLIB::OLT_F64 :
381             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
382       break;
383     default: llvm_unreachable("Do not know how to soften this setcc!");
384     }
385   }
386 
387   // Use the target specific return value for comparions lib calls.
388   EVT RetVT = getCmpLibcallReturnType();
389   SDValue Ops[2] = {NewLHS, NewRHS};
390   TargetLowering::MakeLibCallOptions CallOptions;
391   EVT OpsVT[2] = { OldLHS.getValueType(),
392                    OldRHS.getValueType() };
393   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
394   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
395   NewLHS = Call.first;
396   NewRHS = DAG.getConstant(0, dl, RetVT);
397 
398   CCCode = getCmpLibcallCC(LC1);
399   if (ShouldInvertCC) {
400     assert(RetVT.isInteger());
401     CCCode = getSetCCInverse(CCCode, RetVT);
402   }
403 
404   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
405     // Update Chain.
406     Chain = Call.second;
407   } else {
408     EVT SetCCVT =
409         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
410     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
411     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
412     CCCode = getCmpLibcallCC(LC2);
413     if (ShouldInvertCC)
414       CCCode = getSetCCInverse(CCCode, RetVT);
415     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
416     if (Chain)
417       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
418                           Call2.second);
419     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
420                          Tmp.getValueType(), Tmp, NewLHS);
421     NewRHS = SDValue();
422   }
423 }
424 
425 /// Return the entry encoding for a jump table in the current function. The
426 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
427 unsigned TargetLowering::getJumpTableEncoding() const {
428   // In non-pic modes, just use the address of a block.
429   if (!isPositionIndependent())
430     return MachineJumpTableInfo::EK_BlockAddress;
431 
432   // In PIC mode, if the target supports a GPRel32 directive, use it.
433   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
434     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
435 
436   // Otherwise, use a label difference.
437   return MachineJumpTableInfo::EK_LabelDifference32;
438 }
439 
440 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
441                                                  SelectionDAG &DAG) const {
442   // If our PIC model is GP relative, use the global offset table as the base.
443   unsigned JTEncoding = getJumpTableEncoding();
444 
445   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
446       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
447     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
448 
449   return Table;
450 }
451 
452 /// This returns the relocation base for the given PIC jumptable, the same as
453 /// getPICJumpTableRelocBase, but as an MCExpr.
454 const MCExpr *
455 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
456                                              unsigned JTI,MCContext &Ctx) const{
457   // The normal PIC reloc base is the label at the start of the jump table.
458   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
459 }
460 
461 bool
462 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
463   const TargetMachine &TM = getTargetMachine();
464   const GlobalValue *GV = GA->getGlobal();
465 
466   // If the address is not even local to this DSO we will have to load it from
467   // a got and then add the offset.
468   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
469     return false;
470 
471   // If the code is position independent we will have to add a base register.
472   if (isPositionIndependent())
473     return false;
474 
475   // Otherwise we can do it.
476   return true;
477 }
478 
479 //===----------------------------------------------------------------------===//
480 //  Optimization Methods
481 //===----------------------------------------------------------------------===//
482 
483 /// If the specified instruction has a constant integer operand and there are
484 /// bits set in that constant that are not demanded, then clear those bits and
485 /// return true.
486 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
487                                             TargetLoweringOpt &TLO) const {
488   SDLoc DL(Op);
489   unsigned Opcode = Op.getOpcode();
490 
491   // Do target-specific constant optimization.
492   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
493     return TLO.New.getNode();
494 
495   // FIXME: ISD::SELECT, ISD::SELECT_CC
496   switch (Opcode) {
497   default:
498     break;
499   case ISD::XOR:
500   case ISD::AND:
501   case ISD::OR: {
502     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
503     if (!Op1C)
504       return false;
505 
506     // If this is a 'not' op, don't touch it because that's a canonical form.
507     const APInt &C = Op1C->getAPIntValue();
508     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
509       return false;
510 
511     if (!C.isSubsetOf(Demanded)) {
512       EVT VT = Op.getValueType();
513       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
514       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
515       return TLO.CombineTo(Op, NewOp);
516     }
517 
518     break;
519   }
520   }
521 
522   return false;
523 }
524 
525 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
526 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
527 /// generalized for targets with other types of implicit widening casts.
528 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
529                                       const APInt &Demanded,
530                                       TargetLoweringOpt &TLO) const {
531   assert(Op.getNumOperands() == 2 &&
532          "ShrinkDemandedOp only supports binary operators!");
533   assert(Op.getNode()->getNumValues() == 1 &&
534          "ShrinkDemandedOp only supports nodes with one result!");
535 
536   SelectionDAG &DAG = TLO.DAG;
537   SDLoc dl(Op);
538 
539   // Early return, as this function cannot handle vector types.
540   if (Op.getValueType().isVector())
541     return false;
542 
543   // Don't do this if the node has another user, which may require the
544   // full value.
545   if (!Op.getNode()->hasOneUse())
546     return false;
547 
548   // Search for the smallest integer type with free casts to and from
549   // Op's type. For expedience, just check power-of-2 integer types.
550   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
551   unsigned DemandedSize = Demanded.getActiveBits();
552   unsigned SmallVTBits = DemandedSize;
553   if (!isPowerOf2_32(SmallVTBits))
554     SmallVTBits = NextPowerOf2(SmallVTBits);
555   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
556     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
557     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
558         TLI.isZExtFree(SmallVT, Op.getValueType())) {
559       // We found a type with free casts.
560       SDValue X = DAG.getNode(
561           Op.getOpcode(), dl, SmallVT,
562           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
563           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
564       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
565       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
566       return TLO.CombineTo(Op, Z);
567     }
568   }
569   return false;
570 }
571 
572 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
573                                           DAGCombinerInfo &DCI) const {
574   SelectionDAG &DAG = DCI.DAG;
575   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
576                         !DCI.isBeforeLegalizeOps());
577   KnownBits Known;
578 
579   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
580   if (Simplified) {
581     DCI.AddToWorklist(Op.getNode());
582     DCI.CommitTargetLoweringOpt(TLO);
583   }
584   return Simplified;
585 }
586 
587 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
588                                           KnownBits &Known,
589                                           TargetLoweringOpt &TLO,
590                                           unsigned Depth,
591                                           bool AssumeSingleUse) const {
592   EVT VT = Op.getValueType();
593 
594   // TODO: We can probably do more work on calculating the known bits and
595   // simplifying the operations for scalable vectors, but for now we just
596   // bail out.
597   if (VT.isScalableVector()) {
598     // Pretend we don't know anything for now.
599     Known = KnownBits(DemandedBits.getBitWidth());
600     return false;
601   }
602 
603   APInt DemandedElts = VT.isVector()
604                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
605                            : APInt(1, 1);
606   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
607                               AssumeSingleUse);
608 }
609 
610 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
611 // TODO: Under what circumstances can we create nodes? Constant folding?
612 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
613     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
614     SelectionDAG &DAG, unsigned Depth) const {
615   // Limit search depth.
616   if (Depth >= SelectionDAG::MaxRecursionDepth)
617     return SDValue();
618 
619   // Ignore UNDEFs.
620   if (Op.isUndef())
621     return SDValue();
622 
623   // Not demanding any bits/elts from Op.
624   if (DemandedBits == 0 || DemandedElts == 0)
625     return DAG.getUNDEF(Op.getValueType());
626 
627   unsigned NumElts = DemandedElts.getBitWidth();
628   unsigned BitWidth = DemandedBits.getBitWidth();
629   KnownBits LHSKnown, RHSKnown;
630   switch (Op.getOpcode()) {
631   case ISD::BITCAST: {
632     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
633     EVT SrcVT = Src.getValueType();
634     EVT DstVT = Op.getValueType();
635     if (SrcVT == DstVT)
636       return Src;
637 
638     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
639     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
640     if (NumSrcEltBits == NumDstEltBits)
641       if (SDValue V = SimplifyMultipleUseDemandedBits(
642               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
643         return DAG.getBitcast(DstVT, V);
644 
645     // TODO - bigendian once we have test coverage.
646     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
647         DAG.getDataLayout().isLittleEndian()) {
648       unsigned Scale = NumDstEltBits / NumSrcEltBits;
649       unsigned NumSrcElts = SrcVT.getVectorNumElements();
650       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
651       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
652       for (unsigned i = 0; i != Scale; ++i) {
653         unsigned Offset = i * NumSrcEltBits;
654         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
655         if (!Sub.isNullValue()) {
656           DemandedSrcBits |= Sub;
657           for (unsigned j = 0; j != NumElts; ++j)
658             if (DemandedElts[j])
659               DemandedSrcElts.setBit((j * Scale) + i);
660         }
661       }
662 
663       if (SDValue V = SimplifyMultipleUseDemandedBits(
664               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
665         return DAG.getBitcast(DstVT, V);
666     }
667 
668     // TODO - bigendian once we have test coverage.
669     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
670         DAG.getDataLayout().isLittleEndian()) {
671       unsigned Scale = NumSrcEltBits / NumDstEltBits;
672       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
673       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
674       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
675       for (unsigned i = 0; i != NumElts; ++i)
676         if (DemandedElts[i]) {
677           unsigned Offset = (i % Scale) * NumDstEltBits;
678           DemandedSrcBits.insertBits(DemandedBits, Offset);
679           DemandedSrcElts.setBit(i / Scale);
680         }
681 
682       if (SDValue V = SimplifyMultipleUseDemandedBits(
683               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
684         return DAG.getBitcast(DstVT, V);
685     }
686 
687     break;
688   }
689   case ISD::AND: {
690     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
691     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
692 
693     // If all of the demanded bits are known 1 on one side, return the other.
694     // These bits cannot contribute to the result of the 'and' in this
695     // context.
696     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
697       return Op.getOperand(0);
698     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
699       return Op.getOperand(1);
700     break;
701   }
702   case ISD::OR: {
703     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
704     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
705 
706     // If all of the demanded bits are known zero on one side, return the
707     // other.  These bits cannot contribute to the result of the 'or' in this
708     // context.
709     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
710       return Op.getOperand(0);
711     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
712       return Op.getOperand(1);
713     break;
714   }
715   case ISD::XOR: {
716     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
717     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
718 
719     // If all of the demanded bits are known zero on one side, return the
720     // other.
721     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
722       return Op.getOperand(0);
723     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
724       return Op.getOperand(1);
725     break;
726   }
727   case ISD::SHL: {
728     // If we are only demanding sign bits then we can use the shift source
729     // directly.
730     if (const APInt *MaxSA =
731             DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
732       SDValue Op0 = Op.getOperand(0);
733       unsigned ShAmt = MaxSA->getZExtValue();
734       unsigned NumSignBits =
735           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
736       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
737       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
738         return Op0;
739     }
740     break;
741   }
742   case ISD::SETCC: {
743     SDValue Op0 = Op.getOperand(0);
744     SDValue Op1 = Op.getOperand(1);
745     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
746     // If (1) we only need the sign-bit, (2) the setcc operands are the same
747     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
748     // -1, we may be able to bypass the setcc.
749     if (DemandedBits.isSignMask() &&
750         Op0.getScalarValueSizeInBits() == BitWidth &&
751         getBooleanContents(Op0.getValueType()) ==
752             BooleanContent::ZeroOrNegativeOneBooleanContent) {
753       // If we're testing X < 0, then this compare isn't needed - just use X!
754       // FIXME: We're limiting to integer types here, but this should also work
755       // if we don't care about FP signed-zero. The use of SETLT with FP means
756       // that we don't care about NaNs.
757       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
758           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
759         return Op0;
760     }
761     break;
762   }
763   case ISD::SIGN_EXTEND_INREG: {
764     // If none of the extended bits are demanded, eliminate the sextinreg.
765     SDValue Op0 = Op.getOperand(0);
766     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
767     unsigned ExBits = ExVT.getScalarSizeInBits();
768     if (DemandedBits.getActiveBits() <= ExBits)
769       return Op0;
770     // If the input is already sign extended, just drop the extension.
771     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
772     if (NumSignBits >= (BitWidth - ExBits + 1))
773       return Op0;
774     break;
775   }
776   case ISD::ANY_EXTEND_VECTOR_INREG:
777   case ISD::SIGN_EXTEND_VECTOR_INREG:
778   case ISD::ZERO_EXTEND_VECTOR_INREG: {
779     // If we only want the lowest element and none of extended bits, then we can
780     // return the bitcasted source vector.
781     SDValue Src = Op.getOperand(0);
782     EVT SrcVT = Src.getValueType();
783     EVT DstVT = Op.getValueType();
784     if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
785         DAG.getDataLayout().isLittleEndian() &&
786         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
787       return DAG.getBitcast(DstVT, Src);
788     }
789     break;
790   }
791   case ISD::INSERT_VECTOR_ELT: {
792     // If we don't demand the inserted element, return the base vector.
793     SDValue Vec = Op.getOperand(0);
794     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
795     EVT VecVT = Vec.getValueType();
796     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
797         !DemandedElts[CIdx->getZExtValue()])
798       return Vec;
799     break;
800   }
801   case ISD::INSERT_SUBVECTOR: {
802     // If we don't demand the inserted subvector, return the base vector.
803     SDValue Vec = Op.getOperand(0);
804     SDValue Sub = Op.getOperand(1);
805     uint64_t Idx = Op.getConstantOperandVal(2);
806     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
807     if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
808       return Vec;
809     break;
810   }
811   case ISD::VECTOR_SHUFFLE: {
812     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
813 
814     // If all the demanded elts are from one operand and are inline,
815     // then we can use the operand directly.
816     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
817     for (unsigned i = 0; i != NumElts; ++i) {
818       int M = ShuffleMask[i];
819       if (M < 0 || !DemandedElts[i])
820         continue;
821       AllUndef = false;
822       IdentityLHS &= (M == (int)i);
823       IdentityRHS &= ((M - NumElts) == i);
824     }
825 
826     if (AllUndef)
827       return DAG.getUNDEF(Op.getValueType());
828     if (IdentityLHS)
829       return Op.getOperand(0);
830     if (IdentityRHS)
831       return Op.getOperand(1);
832     break;
833   }
834   default:
835     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
836       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
837               Op, DemandedBits, DemandedElts, DAG, Depth))
838         return V;
839     break;
840   }
841   return SDValue();
842 }
843 
844 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
845     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
846     unsigned Depth) const {
847   EVT VT = Op.getValueType();
848   APInt DemandedElts = VT.isVector()
849                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
850                            : APInt(1, 1);
851   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
852                                          Depth);
853 }
854 
855 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
856     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
857     unsigned Depth) const {
858   APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
859   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
860                                          Depth);
861 }
862 
863 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
864 /// result of Op are ever used downstream. If we can use this information to
865 /// simplify Op, create a new simplified DAG node and return true, returning the
866 /// original and new nodes in Old and New. Otherwise, analyze the expression and
867 /// return a mask of Known bits for the expression (used to simplify the
868 /// caller).  The Known bits may only be accurate for those bits in the
869 /// OriginalDemandedBits and OriginalDemandedElts.
870 bool TargetLowering::SimplifyDemandedBits(
871     SDValue Op, const APInt &OriginalDemandedBits,
872     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
873     unsigned Depth, bool AssumeSingleUse) const {
874   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
875   assert(Op.getScalarValueSizeInBits() == BitWidth &&
876          "Mask size mismatches value type size!");
877 
878   // Don't know anything.
879   Known = KnownBits(BitWidth);
880 
881   // TODO: We can probably do more work on calculating the known bits and
882   // simplifying the operations for scalable vectors, but for now we just
883   // bail out.
884   if (Op.getValueType().isScalableVector())
885     return false;
886 
887   unsigned NumElts = OriginalDemandedElts.getBitWidth();
888   assert((!Op.getValueType().isVector() ||
889           NumElts == Op.getValueType().getVectorNumElements()) &&
890          "Unexpected vector size");
891 
892   APInt DemandedBits = OriginalDemandedBits;
893   APInt DemandedElts = OriginalDemandedElts;
894   SDLoc dl(Op);
895   auto &DL = TLO.DAG.getDataLayout();
896 
897   // Undef operand.
898   if (Op.isUndef())
899     return false;
900 
901   if (Op.getOpcode() == ISD::Constant) {
902     // We know all of the bits for a constant!
903     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
904     Known.Zero = ~Known.One;
905     return false;
906   }
907 
908   // Other users may use these bits.
909   EVT VT = Op.getValueType();
910   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
911     if (Depth != 0) {
912       // If not at the root, Just compute the Known bits to
913       // simplify things downstream.
914       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
915       return false;
916     }
917     // If this is the root being simplified, allow it to have multiple uses,
918     // just set the DemandedBits/Elts to all bits.
919     DemandedBits = APInt::getAllOnesValue(BitWidth);
920     DemandedElts = APInt::getAllOnesValue(NumElts);
921   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
922     // Not demanding any bits/elts from Op.
923     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
924   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
925     // Limit search depth.
926     return false;
927   }
928 
929   KnownBits Known2;
930   switch (Op.getOpcode()) {
931   case ISD::TargetConstant:
932     llvm_unreachable("Can't simplify this node");
933   case ISD::SCALAR_TO_VECTOR: {
934     if (!DemandedElts[0])
935       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
936 
937     KnownBits SrcKnown;
938     SDValue Src = Op.getOperand(0);
939     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
940     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
941     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
942       return true;
943 
944     // Upper elements are undef, so only get the knownbits if we just demand
945     // the bottom element.
946     if (DemandedElts == 1)
947       Known = SrcKnown.anyextOrTrunc(BitWidth);
948     break;
949   }
950   case ISD::BUILD_VECTOR:
951     // Collect the known bits that are shared by every demanded element.
952     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
953     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
954     return false; // Don't fall through, will infinitely loop.
955   case ISD::LOAD: {
956     LoadSDNode *LD = cast<LoadSDNode>(Op);
957     if (getTargetConstantFromLoad(LD)) {
958       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
959       return false; // Don't fall through, will infinitely loop.
960     } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
961       // If this is a ZEXTLoad and we are looking at the loaded value.
962       EVT MemVT = LD->getMemoryVT();
963       unsigned MemBits = MemVT.getScalarSizeInBits();
964       Known.Zero.setBitsFrom(MemBits);
965       return false; // Don't fall through, will infinitely loop.
966     }
967     break;
968   }
969   case ISD::INSERT_VECTOR_ELT: {
970     SDValue Vec = Op.getOperand(0);
971     SDValue Scl = Op.getOperand(1);
972     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
973     EVT VecVT = Vec.getValueType();
974 
975     // If index isn't constant, assume we need all vector elements AND the
976     // inserted element.
977     APInt DemandedVecElts(DemandedElts);
978     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
979       unsigned Idx = CIdx->getZExtValue();
980       DemandedVecElts.clearBit(Idx);
981 
982       // Inserted element is not required.
983       if (!DemandedElts[Idx])
984         return TLO.CombineTo(Op, Vec);
985     }
986 
987     KnownBits KnownScl;
988     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
989     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
990     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
991       return true;
992 
993     Known = KnownScl.anyextOrTrunc(BitWidth);
994 
995     KnownBits KnownVec;
996     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
997                              Depth + 1))
998       return true;
999 
1000     if (!!DemandedVecElts) {
1001       Known.One &= KnownVec.One;
1002       Known.Zero &= KnownVec.Zero;
1003     }
1004 
1005     return false;
1006   }
1007   case ISD::INSERT_SUBVECTOR: {
1008     // Demand any elements from the subvector and the remainder from the src its
1009     // inserted into.
1010     SDValue Src = Op.getOperand(0);
1011     SDValue Sub = Op.getOperand(1);
1012     uint64_t Idx = Op.getConstantOperandVal(2);
1013     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1014     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1015     APInt DemandedSrcElts = DemandedElts;
1016     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
1017 
1018     KnownBits KnownSub, KnownSrc;
1019     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1020                              Depth + 1))
1021       return true;
1022     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1023                              Depth + 1))
1024       return true;
1025 
1026     Known.Zero.setAllBits();
1027     Known.One.setAllBits();
1028     if (!!DemandedSubElts) {
1029       Known.One &= KnownSub.One;
1030       Known.Zero &= KnownSub.Zero;
1031     }
1032     if (!!DemandedSrcElts) {
1033       Known.One &= KnownSrc.One;
1034       Known.Zero &= KnownSrc.Zero;
1035     }
1036 
1037     // Attempt to avoid multi-use src if we don't need anything from it.
1038     if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
1039         !DemandedSrcElts.isAllOnesValue()) {
1040       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1041           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1042       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1043           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1044       if (NewSub || NewSrc) {
1045         NewSub = NewSub ? NewSub : Sub;
1046         NewSrc = NewSrc ? NewSrc : Src;
1047         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1048                                         Op.getOperand(2));
1049         return TLO.CombineTo(Op, NewOp);
1050       }
1051     }
1052     break;
1053   }
1054   case ISD::EXTRACT_SUBVECTOR: {
1055     // Offset the demanded elts by the subvector index.
1056     SDValue Src = Op.getOperand(0);
1057     if (Src.getValueType().isScalableVector())
1058       break;
1059     uint64_t Idx = Op.getConstantOperandVal(1);
1060     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1061     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1062 
1063     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1064                              Depth + 1))
1065       return true;
1066 
1067     // Attempt to avoid multi-use src if we don't need anything from it.
1068     if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
1069       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1070           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1071       if (DemandedSrc) {
1072         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1073                                         Op.getOperand(1));
1074         return TLO.CombineTo(Op, NewOp);
1075       }
1076     }
1077     break;
1078   }
1079   case ISD::CONCAT_VECTORS: {
1080     Known.Zero.setAllBits();
1081     Known.One.setAllBits();
1082     EVT SubVT = Op.getOperand(0).getValueType();
1083     unsigned NumSubVecs = Op.getNumOperands();
1084     unsigned NumSubElts = SubVT.getVectorNumElements();
1085     for (unsigned i = 0; i != NumSubVecs; ++i) {
1086       APInt DemandedSubElts =
1087           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1088       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1089                                Known2, TLO, Depth + 1))
1090         return true;
1091       // Known bits are shared by every demanded subvector element.
1092       if (!!DemandedSubElts) {
1093         Known.One &= Known2.One;
1094         Known.Zero &= Known2.Zero;
1095       }
1096     }
1097     break;
1098   }
1099   case ISD::VECTOR_SHUFFLE: {
1100     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1101 
1102     // Collect demanded elements from shuffle operands..
1103     APInt DemandedLHS(NumElts, 0);
1104     APInt DemandedRHS(NumElts, 0);
1105     for (unsigned i = 0; i != NumElts; ++i) {
1106       if (!DemandedElts[i])
1107         continue;
1108       int M = ShuffleMask[i];
1109       if (M < 0) {
1110         // For UNDEF elements, we don't know anything about the common state of
1111         // the shuffle result.
1112         DemandedLHS.clearAllBits();
1113         DemandedRHS.clearAllBits();
1114         break;
1115       }
1116       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1117       if (M < (int)NumElts)
1118         DemandedLHS.setBit(M);
1119       else
1120         DemandedRHS.setBit(M - NumElts);
1121     }
1122 
1123     if (!!DemandedLHS || !!DemandedRHS) {
1124       SDValue Op0 = Op.getOperand(0);
1125       SDValue Op1 = Op.getOperand(1);
1126 
1127       Known.Zero.setAllBits();
1128       Known.One.setAllBits();
1129       if (!!DemandedLHS) {
1130         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1131                                  Depth + 1))
1132           return true;
1133         Known.One &= Known2.One;
1134         Known.Zero &= Known2.Zero;
1135       }
1136       if (!!DemandedRHS) {
1137         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1138                                  Depth + 1))
1139           return true;
1140         Known.One &= Known2.One;
1141         Known.Zero &= Known2.Zero;
1142       }
1143 
1144       // Attempt to avoid multi-use ops if we don't need anything from them.
1145       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1146           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1147       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1148           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1149       if (DemandedOp0 || DemandedOp1) {
1150         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1151         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1152         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1153         return TLO.CombineTo(Op, NewOp);
1154       }
1155     }
1156     break;
1157   }
1158   case ISD::AND: {
1159     SDValue Op0 = Op.getOperand(0);
1160     SDValue Op1 = Op.getOperand(1);
1161 
1162     // If the RHS is a constant, check to see if the LHS would be zero without
1163     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1164     // simplify the LHS, here we're using information from the LHS to simplify
1165     // the RHS.
1166     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1167       // Do not increment Depth here; that can cause an infinite loop.
1168       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1169       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1170       if ((LHSKnown.Zero & DemandedBits) ==
1171           (~RHSC->getAPIntValue() & DemandedBits))
1172         return TLO.CombineTo(Op, Op0);
1173 
1174       // If any of the set bits in the RHS are known zero on the LHS, shrink
1175       // the constant.
1176       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1177         return true;
1178 
1179       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1180       // constant, but if this 'and' is only clearing bits that were just set by
1181       // the xor, then this 'and' can be eliminated by shrinking the mask of
1182       // the xor. For example, for a 32-bit X:
1183       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1184       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1185           LHSKnown.One == ~RHSC->getAPIntValue()) {
1186         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1187         return TLO.CombineTo(Op, Xor);
1188       }
1189     }
1190 
1191     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1192                              Depth + 1))
1193       return true;
1194     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1195     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1196                              Known2, TLO, Depth + 1))
1197       return true;
1198     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1199 
1200     // Attempt to avoid multi-use ops if we don't need anything from them.
1201     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1202       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1203           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1204       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1205           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1206       if (DemandedOp0 || DemandedOp1) {
1207         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1208         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1209         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1210         return TLO.CombineTo(Op, NewOp);
1211       }
1212     }
1213 
1214     // If all of the demanded bits are known one on one side, return the other.
1215     // These bits cannot contribute to the result of the 'and'.
1216     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1217       return TLO.CombineTo(Op, Op0);
1218     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1219       return TLO.CombineTo(Op, Op1);
1220     // If all of the demanded bits in the inputs are known zeros, return zero.
1221     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1222       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1223     // If the RHS is a constant, see if we can simplify it.
1224     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1225       return true;
1226     // If the operation can be done in a smaller type, do so.
1227     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1228       return true;
1229 
1230     Known &= Known2;
1231     break;
1232   }
1233   case ISD::OR: {
1234     SDValue Op0 = Op.getOperand(0);
1235     SDValue Op1 = Op.getOperand(1);
1236 
1237     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1238                              Depth + 1))
1239       return true;
1240     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1241     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1242                              Known2, TLO, Depth + 1))
1243       return true;
1244     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1245 
1246     // Attempt to avoid multi-use ops if we don't need anything from them.
1247     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1248       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1249           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1250       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1251           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1252       if (DemandedOp0 || DemandedOp1) {
1253         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1254         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1255         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1256         return TLO.CombineTo(Op, NewOp);
1257       }
1258     }
1259 
1260     // If all of the demanded bits are known zero on one side, return the other.
1261     // These bits cannot contribute to the result of the 'or'.
1262     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1263       return TLO.CombineTo(Op, Op0);
1264     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1265       return TLO.CombineTo(Op, Op1);
1266     // If the RHS is a constant, see if we can simplify it.
1267     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1268       return true;
1269     // If the operation can be done in a smaller type, do so.
1270     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1271       return true;
1272 
1273     Known |= Known2;
1274     break;
1275   }
1276   case ISD::XOR: {
1277     SDValue Op0 = Op.getOperand(0);
1278     SDValue Op1 = Op.getOperand(1);
1279 
1280     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1281                              Depth + 1))
1282       return true;
1283     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1284     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1285                              Depth + 1))
1286       return true;
1287     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1288 
1289     // Attempt to avoid multi-use ops if we don't need anything from them.
1290     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1291       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1292           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1293       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1294           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1295       if (DemandedOp0 || DemandedOp1) {
1296         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1297         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1298         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1299         return TLO.CombineTo(Op, NewOp);
1300       }
1301     }
1302 
1303     // If all of the demanded bits are known zero on one side, return the other.
1304     // These bits cannot contribute to the result of the 'xor'.
1305     if (DemandedBits.isSubsetOf(Known.Zero))
1306       return TLO.CombineTo(Op, Op0);
1307     if (DemandedBits.isSubsetOf(Known2.Zero))
1308       return TLO.CombineTo(Op, Op1);
1309     // If the operation can be done in a smaller type, do so.
1310     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1311       return true;
1312 
1313     // If all of the unknown bits are known to be zero on one side or the other
1314     // (but not both) turn this into an *inclusive* or.
1315     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1316     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1317       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1318 
1319     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1320       // If one side is a constant, and all of the known set bits on the other
1321       // side are also set in the constant, turn this into an AND, as we know
1322       // the bits will be cleared.
1323       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1324       // NB: it is okay if more bits are known than are requested
1325       if (C->getAPIntValue() == Known2.One) {
1326         SDValue ANDC =
1327             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1328         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1329       }
1330 
1331       // If the RHS is a constant, see if we can change it. Don't alter a -1
1332       // constant because that's a 'not' op, and that is better for combining
1333       // and codegen.
1334       if (!C->isAllOnesValue()) {
1335         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1336           // We're flipping all demanded bits. Flip the undemanded bits too.
1337           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1338           return TLO.CombineTo(Op, New);
1339         }
1340         // If we can't turn this into a 'not', try to shrink the constant.
1341         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1342           return true;
1343       }
1344     }
1345 
1346     Known ^= Known2;
1347     break;
1348   }
1349   case ISD::SELECT:
1350     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1351                              Depth + 1))
1352       return true;
1353     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1354                              Depth + 1))
1355       return true;
1356     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1357     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1358 
1359     // If the operands are constants, see if we can simplify them.
1360     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1361       return true;
1362 
1363     // Only known if known in both the LHS and RHS.
1364     Known.One &= Known2.One;
1365     Known.Zero &= Known2.Zero;
1366     break;
1367   case ISD::SELECT_CC:
1368     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1369                              Depth + 1))
1370       return true;
1371     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1372                              Depth + 1))
1373       return true;
1374     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1375     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1376 
1377     // If the operands are constants, see if we can simplify them.
1378     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1379       return true;
1380 
1381     // Only known if known in both the LHS and RHS.
1382     Known.One &= Known2.One;
1383     Known.Zero &= Known2.Zero;
1384     break;
1385   case ISD::SETCC: {
1386     SDValue Op0 = Op.getOperand(0);
1387     SDValue Op1 = Op.getOperand(1);
1388     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1389     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1390     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1391     // -1, we may be able to bypass the setcc.
1392     if (DemandedBits.isSignMask() &&
1393         Op0.getScalarValueSizeInBits() == BitWidth &&
1394         getBooleanContents(Op0.getValueType()) ==
1395             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1396       // If we're testing X < 0, then this compare isn't needed - just use X!
1397       // FIXME: We're limiting to integer types here, but this should also work
1398       // if we don't care about FP signed-zero. The use of SETLT with FP means
1399       // that we don't care about NaNs.
1400       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1401           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1402         return TLO.CombineTo(Op, Op0);
1403 
1404       // TODO: Should we check for other forms of sign-bit comparisons?
1405       // Examples: X <= -1, X >= 0
1406     }
1407     if (getBooleanContents(Op0.getValueType()) ==
1408             TargetLowering::ZeroOrOneBooleanContent &&
1409         BitWidth > 1)
1410       Known.Zero.setBitsFrom(1);
1411     break;
1412   }
1413   case ISD::SHL: {
1414     SDValue Op0 = Op.getOperand(0);
1415     SDValue Op1 = Op.getOperand(1);
1416     EVT ShiftVT = Op1.getValueType();
1417 
1418     if (const APInt *SA =
1419             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1420       unsigned ShAmt = SA->getZExtValue();
1421       if (ShAmt == 0)
1422         return TLO.CombineTo(Op, Op0);
1423 
1424       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1425       // single shift.  We can do this if the bottom bits (which are shifted
1426       // out) are never demanded.
1427       // TODO - support non-uniform vector amounts.
1428       if (Op0.getOpcode() == ISD::SRL) {
1429         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1430           if (const APInt *SA2 =
1431                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1432             unsigned C1 = SA2->getZExtValue();
1433             unsigned Opc = ISD::SHL;
1434             int Diff = ShAmt - C1;
1435             if (Diff < 0) {
1436               Diff = -Diff;
1437               Opc = ISD::SRL;
1438             }
1439             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1440             return TLO.CombineTo(
1441                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1442           }
1443         }
1444       }
1445 
1446       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1447       // are not demanded. This will likely allow the anyext to be folded away.
1448       // TODO - support non-uniform vector amounts.
1449       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1450         SDValue InnerOp = Op0.getOperand(0);
1451         EVT InnerVT = InnerOp.getValueType();
1452         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1453         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1454             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1455           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1456           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1457             ShTy = InnerVT;
1458           SDValue NarrowShl =
1459               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1460                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1461           return TLO.CombineTo(
1462               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1463         }
1464 
1465         // Repeat the SHL optimization above in cases where an extension
1466         // intervenes: (shl (anyext (shr x, c1)), c2) to
1467         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1468         // aren't demanded (as above) and that the shifted upper c1 bits of
1469         // x aren't demanded.
1470         // TODO - support non-uniform vector amounts.
1471         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1472             InnerOp.hasOneUse()) {
1473           if (const APInt *SA2 =
1474                   TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1475             unsigned InnerShAmt = SA2->getZExtValue();
1476             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1477                 DemandedBits.getActiveBits() <=
1478                     (InnerBits - InnerShAmt + ShAmt) &&
1479                 DemandedBits.countTrailingZeros() >= ShAmt) {
1480               SDValue NewSA =
1481                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1482               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1483                                                InnerOp.getOperand(0));
1484               return TLO.CombineTo(
1485                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1486             }
1487           }
1488         }
1489       }
1490 
1491       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1492       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1493                                Depth + 1))
1494         return true;
1495       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1496       Known.Zero <<= ShAmt;
1497       Known.One <<= ShAmt;
1498       // low bits known zero.
1499       Known.Zero.setLowBits(ShAmt);
1500 
1501       // Try shrinking the operation as long as the shift amount will still be
1502       // in range.
1503       if ((ShAmt < DemandedBits.getActiveBits()) &&
1504           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1505         return true;
1506     }
1507 
1508     // If we are only demanding sign bits then we can use the shift source
1509     // directly.
1510     if (const APInt *MaxSA =
1511             TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1512       unsigned ShAmt = MaxSA->getZExtValue();
1513       unsigned NumSignBits =
1514           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1515       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1516       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1517         return TLO.CombineTo(Op, Op0);
1518     }
1519     break;
1520   }
1521   case ISD::SRL: {
1522     SDValue Op0 = Op.getOperand(0);
1523     SDValue Op1 = Op.getOperand(1);
1524     EVT ShiftVT = Op1.getValueType();
1525 
1526     if (const APInt *SA =
1527             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1528       unsigned ShAmt = SA->getZExtValue();
1529       if (ShAmt == 0)
1530         return TLO.CombineTo(Op, Op0);
1531 
1532       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1533       // single shift.  We can do this if the top bits (which are shifted out)
1534       // are never demanded.
1535       // TODO - support non-uniform vector amounts.
1536       if (Op0.getOpcode() == ISD::SHL) {
1537         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1538           if (const APInt *SA2 =
1539                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1540             unsigned C1 = SA2->getZExtValue();
1541             unsigned Opc = ISD::SRL;
1542             int Diff = ShAmt - C1;
1543             if (Diff < 0) {
1544               Diff = -Diff;
1545               Opc = ISD::SHL;
1546             }
1547             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1548             return TLO.CombineTo(
1549                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1550           }
1551         }
1552       }
1553 
1554       APInt InDemandedMask = (DemandedBits << ShAmt);
1555 
1556       // If the shift is exact, then it does demand the low bits (and knows that
1557       // they are zero).
1558       if (Op->getFlags().hasExact())
1559         InDemandedMask.setLowBits(ShAmt);
1560 
1561       // Compute the new bits that are at the top now.
1562       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1563                                Depth + 1))
1564         return true;
1565       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1566       Known.Zero.lshrInPlace(ShAmt);
1567       Known.One.lshrInPlace(ShAmt);
1568       // High bits known zero.
1569       Known.Zero.setHighBits(ShAmt);
1570     }
1571     break;
1572   }
1573   case ISD::SRA: {
1574     SDValue Op0 = Op.getOperand(0);
1575     SDValue Op1 = Op.getOperand(1);
1576     EVT ShiftVT = Op1.getValueType();
1577 
1578     // If we only want bits that already match the signbit then we don't need
1579     // to shift.
1580     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1581     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1582         NumHiDemandedBits)
1583       return TLO.CombineTo(Op, Op0);
1584 
1585     // If this is an arithmetic shift right and only the low-bit is set, we can
1586     // always convert this into a logical shr, even if the shift amount is
1587     // variable.  The low bit of the shift cannot be an input sign bit unless
1588     // the shift amount is >= the size of the datatype, which is undefined.
1589     if (DemandedBits.isOneValue())
1590       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1591 
1592     if (const APInt *SA =
1593             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1594       unsigned ShAmt = SA->getZExtValue();
1595       if (ShAmt == 0)
1596         return TLO.CombineTo(Op, Op0);
1597 
1598       APInt InDemandedMask = (DemandedBits << ShAmt);
1599 
1600       // If the shift is exact, then it does demand the low bits (and knows that
1601       // they are zero).
1602       if (Op->getFlags().hasExact())
1603         InDemandedMask.setLowBits(ShAmt);
1604 
1605       // If any of the demanded bits are produced by the sign extension, we also
1606       // demand the input sign bit.
1607       if (DemandedBits.countLeadingZeros() < ShAmt)
1608         InDemandedMask.setSignBit();
1609 
1610       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1611                                Depth + 1))
1612         return true;
1613       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1614       Known.Zero.lshrInPlace(ShAmt);
1615       Known.One.lshrInPlace(ShAmt);
1616 
1617       // If the input sign bit is known to be zero, or if none of the top bits
1618       // are demanded, turn this into an unsigned shift right.
1619       if (Known.Zero[BitWidth - ShAmt - 1] ||
1620           DemandedBits.countLeadingZeros() >= ShAmt) {
1621         SDNodeFlags Flags;
1622         Flags.setExact(Op->getFlags().hasExact());
1623         return TLO.CombineTo(
1624             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1625       }
1626 
1627       int Log2 = DemandedBits.exactLogBase2();
1628       if (Log2 >= 0) {
1629         // The bit must come from the sign.
1630         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1631         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1632       }
1633 
1634       if (Known.One[BitWidth - ShAmt - 1])
1635         // New bits are known one.
1636         Known.One.setHighBits(ShAmt);
1637 
1638       // Attempt to avoid multi-use ops if we don't need anything from them.
1639       if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1640         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1641             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1642         if (DemandedOp0) {
1643           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1644           return TLO.CombineTo(Op, NewOp);
1645         }
1646       }
1647     }
1648     break;
1649   }
1650   case ISD::FSHL:
1651   case ISD::FSHR: {
1652     SDValue Op0 = Op.getOperand(0);
1653     SDValue Op1 = Op.getOperand(1);
1654     SDValue Op2 = Op.getOperand(2);
1655     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1656 
1657     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1658       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1659 
1660       // For fshl, 0-shift returns the 1st arg.
1661       // For fshr, 0-shift returns the 2nd arg.
1662       if (Amt == 0) {
1663         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1664                                  Known, TLO, Depth + 1))
1665           return true;
1666         break;
1667       }
1668 
1669       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1670       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1671       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1672       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1673       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1674                                Depth + 1))
1675         return true;
1676       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1677                                Depth + 1))
1678         return true;
1679 
1680       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1681       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1682       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1683       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1684       Known.One |= Known2.One;
1685       Known.Zero |= Known2.Zero;
1686     }
1687 
1688     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1689     if (isPowerOf2_32(BitWidth)) {
1690       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1691       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1692                                Known2, TLO, Depth + 1))
1693         return true;
1694     }
1695     break;
1696   }
1697   case ISD::ROTL:
1698   case ISD::ROTR: {
1699     SDValue Op0 = Op.getOperand(0);
1700     SDValue Op1 = Op.getOperand(1);
1701 
1702     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1703     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1704       return TLO.CombineTo(Op, Op0);
1705 
1706     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1707     if (isPowerOf2_32(BitWidth)) {
1708       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1709       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1710                                Depth + 1))
1711         return true;
1712     }
1713     break;
1714   }
1715   case ISD::BITREVERSE: {
1716     SDValue Src = Op.getOperand(0);
1717     APInt DemandedSrcBits = DemandedBits.reverseBits();
1718     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1719                              Depth + 1))
1720       return true;
1721     Known.One = Known2.One.reverseBits();
1722     Known.Zero = Known2.Zero.reverseBits();
1723     break;
1724   }
1725   case ISD::BSWAP: {
1726     SDValue Src = Op.getOperand(0);
1727     APInt DemandedSrcBits = DemandedBits.byteSwap();
1728     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1729                              Depth + 1))
1730       return true;
1731     Known.One = Known2.One.byteSwap();
1732     Known.Zero = Known2.Zero.byteSwap();
1733     break;
1734   }
1735   case ISD::SIGN_EXTEND_INREG: {
1736     SDValue Op0 = Op.getOperand(0);
1737     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1738     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1739 
1740     // If we only care about the highest bit, don't bother shifting right.
1741     if (DemandedBits.isSignMask()) {
1742       unsigned NumSignBits =
1743           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1744       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1745       // However if the input is already sign extended we expect the sign
1746       // extension to be dropped altogether later and do not simplify.
1747       if (!AlreadySignExtended) {
1748         // Compute the correct shift amount type, which must be getShiftAmountTy
1749         // for scalar types after legalization.
1750         EVT ShiftAmtTy = VT;
1751         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1752           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1753 
1754         SDValue ShiftAmt =
1755             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1756         return TLO.CombineTo(Op,
1757                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1758       }
1759     }
1760 
1761     // If none of the extended bits are demanded, eliminate the sextinreg.
1762     if (DemandedBits.getActiveBits() <= ExVTBits)
1763       return TLO.CombineTo(Op, Op0);
1764 
1765     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1766 
1767     // Since the sign extended bits are demanded, we know that the sign
1768     // bit is demanded.
1769     InputDemandedBits.setBit(ExVTBits - 1);
1770 
1771     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1772       return true;
1773     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1774 
1775     // If the sign bit of the input is known set or clear, then we know the
1776     // top bits of the result.
1777 
1778     // If the input sign bit is known zero, convert this into a zero extension.
1779     if (Known.Zero[ExVTBits - 1])
1780       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
1781 
1782     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1783     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1784       Known.One.setBitsFrom(ExVTBits);
1785       Known.Zero &= Mask;
1786     } else { // Input sign bit unknown
1787       Known.Zero &= Mask;
1788       Known.One &= Mask;
1789     }
1790     break;
1791   }
1792   case ISD::BUILD_PAIR: {
1793     EVT HalfVT = Op.getOperand(0).getValueType();
1794     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1795 
1796     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1797     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1798 
1799     KnownBits KnownLo, KnownHi;
1800 
1801     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1802       return true;
1803 
1804     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1805       return true;
1806 
1807     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1808                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1809 
1810     Known.One = KnownLo.One.zext(BitWidth) |
1811                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1812     break;
1813   }
1814   case ISD::ZERO_EXTEND:
1815   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1816     SDValue Src = Op.getOperand(0);
1817     EVT SrcVT = Src.getValueType();
1818     unsigned InBits = SrcVT.getScalarSizeInBits();
1819     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1820     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1821 
1822     // If none of the top bits are demanded, convert this into an any_extend.
1823     if (DemandedBits.getActiveBits() <= InBits) {
1824       // If we only need the non-extended bits of the bottom element
1825       // then we can just bitcast to the result.
1826       if (IsVecInReg && DemandedElts == 1 &&
1827           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1828           TLO.DAG.getDataLayout().isLittleEndian())
1829         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1830 
1831       unsigned Opc =
1832           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1833       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1834         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1835     }
1836 
1837     APInt InDemandedBits = DemandedBits.trunc(InBits);
1838     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1839     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1840                              Depth + 1))
1841       return true;
1842     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1843     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1844     Known = Known.zext(BitWidth);
1845     break;
1846   }
1847   case ISD::SIGN_EXTEND:
1848   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1849     SDValue Src = Op.getOperand(0);
1850     EVT SrcVT = Src.getValueType();
1851     unsigned InBits = SrcVT.getScalarSizeInBits();
1852     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1853     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1854 
1855     // If none of the top bits are demanded, convert this into an any_extend.
1856     if (DemandedBits.getActiveBits() <= InBits) {
1857       // If we only need the non-extended bits of the bottom element
1858       // then we can just bitcast to the result.
1859       if (IsVecInReg && DemandedElts == 1 &&
1860           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1861           TLO.DAG.getDataLayout().isLittleEndian())
1862         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1863 
1864       unsigned Opc =
1865           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1866       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1867         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1868     }
1869 
1870     APInt InDemandedBits = DemandedBits.trunc(InBits);
1871     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1872 
1873     // Since some of the sign extended bits are demanded, we know that the sign
1874     // bit is demanded.
1875     InDemandedBits.setBit(InBits - 1);
1876 
1877     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1878                              Depth + 1))
1879       return true;
1880     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1881     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1882 
1883     // If the sign bit is known one, the top bits match.
1884     Known = Known.sext(BitWidth);
1885 
1886     // If the sign bit is known zero, convert this to a zero extend.
1887     if (Known.isNonNegative()) {
1888       unsigned Opc =
1889           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1890       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1891         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1892     }
1893     break;
1894   }
1895   case ISD::ANY_EXTEND:
1896   case ISD::ANY_EXTEND_VECTOR_INREG: {
1897     SDValue Src = Op.getOperand(0);
1898     EVT SrcVT = Src.getValueType();
1899     unsigned InBits = SrcVT.getScalarSizeInBits();
1900     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1901     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1902 
1903     // If we only need the bottom element then we can just bitcast.
1904     // TODO: Handle ANY_EXTEND?
1905     if (IsVecInReg && DemandedElts == 1 &&
1906         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1907         TLO.DAG.getDataLayout().isLittleEndian())
1908       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1909 
1910     APInt InDemandedBits = DemandedBits.trunc(InBits);
1911     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1912     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1913                              Depth + 1))
1914       return true;
1915     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1916     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1917     Known = Known.anyext(BitWidth);
1918 
1919     // Attempt to avoid multi-use ops if we don't need anything from them.
1920     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1921             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1922       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1923     break;
1924   }
1925   case ISD::TRUNCATE: {
1926     SDValue Src = Op.getOperand(0);
1927 
1928     // Simplify the input, using demanded bit information, and compute the known
1929     // zero/one bits live out.
1930     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1931     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1932     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1933       return true;
1934     Known = Known.trunc(BitWidth);
1935 
1936     // Attempt to avoid multi-use ops if we don't need anything from them.
1937     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1938             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1939       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1940 
1941     // If the input is only used by this truncate, see if we can shrink it based
1942     // on the known demanded bits.
1943     if (Src.getNode()->hasOneUse()) {
1944       switch (Src.getOpcode()) {
1945       default:
1946         break;
1947       case ISD::SRL:
1948         // Shrink SRL by a constant if none of the high bits shifted in are
1949         // demanded.
1950         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1951           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1952           // undesirable.
1953           break;
1954 
1955         SDValue ShAmt = Src.getOperand(1);
1956         auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1957         if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
1958           break;
1959         uint64_t ShVal = ShAmtC->getZExtValue();
1960 
1961         APInt HighBits =
1962             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1963         HighBits.lshrInPlace(ShVal);
1964         HighBits = HighBits.trunc(BitWidth);
1965 
1966         if (!(HighBits & DemandedBits)) {
1967           // None of the shifted in bits are needed.  Add a truncate of the
1968           // shift input, then shift it.
1969           if (TLO.LegalTypes())
1970             ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1971           SDValue NewTrunc =
1972               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1973           return TLO.CombineTo(
1974               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
1975         }
1976         break;
1977       }
1978     }
1979 
1980     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1981     break;
1982   }
1983   case ISD::AssertZext: {
1984     // AssertZext demands all of the high bits, plus any of the low bits
1985     // demanded by its users.
1986     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1987     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1988     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1989                              TLO, Depth + 1))
1990       return true;
1991     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1992 
1993     Known.Zero |= ~InMask;
1994     break;
1995   }
1996   case ISD::EXTRACT_VECTOR_ELT: {
1997     SDValue Src = Op.getOperand(0);
1998     SDValue Idx = Op.getOperand(1);
1999     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2000     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2001 
2002     // Demand the bits from every vector element without a constant index.
2003     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
2004     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2005       if (CIdx->getAPIntValue().ult(NumSrcElts))
2006         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2007 
2008     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2009     // anything about the extended bits.
2010     APInt DemandedSrcBits = DemandedBits;
2011     if (BitWidth > EltBitWidth)
2012       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2013 
2014     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2015                              Depth + 1))
2016       return true;
2017 
2018     // Attempt to avoid multi-use ops if we don't need anything from them.
2019     if (!DemandedSrcBits.isAllOnesValue() ||
2020         !DemandedSrcElts.isAllOnesValue()) {
2021       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2022               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2023         SDValue NewOp =
2024             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2025         return TLO.CombineTo(Op, NewOp);
2026       }
2027     }
2028 
2029     Known = Known2;
2030     if (BitWidth > EltBitWidth)
2031       Known = Known.anyext(BitWidth);
2032     break;
2033   }
2034   case ISD::BITCAST: {
2035     SDValue Src = Op.getOperand(0);
2036     EVT SrcVT = Src.getValueType();
2037     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2038 
2039     // If this is an FP->Int bitcast and if the sign bit is the only
2040     // thing demanded, turn this into a FGETSIGN.
2041     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2042         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2043         SrcVT.isFloatingPoint()) {
2044       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2045       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2046       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2047           SrcVT != MVT::f128) {
2048         // Cannot eliminate/lower SHL for f128 yet.
2049         EVT Ty = OpVTLegal ? VT : MVT::i32;
2050         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2051         // place.  We expect the SHL to be eliminated by other optimizations.
2052         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2053         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2054         if (!OpVTLegal && OpVTSizeInBits > 32)
2055           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2056         unsigned ShVal = Op.getValueSizeInBits() - 1;
2057         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2058         return TLO.CombineTo(Op,
2059                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2060       }
2061     }
2062 
2063     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2064     // Demand the elt/bit if any of the original elts/bits are demanded.
2065     // TODO - bigendian once we have test coverage.
2066     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
2067         TLO.DAG.getDataLayout().isLittleEndian()) {
2068       unsigned Scale = BitWidth / NumSrcEltBits;
2069       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2070       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2071       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2072       for (unsigned i = 0; i != Scale; ++i) {
2073         unsigned Offset = i * NumSrcEltBits;
2074         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
2075         if (!Sub.isNullValue()) {
2076           DemandedSrcBits |= Sub;
2077           for (unsigned j = 0; j != NumElts; ++j)
2078             if (DemandedElts[j])
2079               DemandedSrcElts.setBit((j * Scale) + i);
2080         }
2081       }
2082 
2083       APInt KnownSrcUndef, KnownSrcZero;
2084       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2085                                      KnownSrcZero, TLO, Depth + 1))
2086         return true;
2087 
2088       KnownBits KnownSrcBits;
2089       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2090                                KnownSrcBits, TLO, Depth + 1))
2091         return true;
2092     } else if ((NumSrcEltBits % BitWidth) == 0 &&
2093                TLO.DAG.getDataLayout().isLittleEndian()) {
2094       unsigned Scale = NumSrcEltBits / BitWidth;
2095       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2096       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2097       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2098       for (unsigned i = 0; i != NumElts; ++i)
2099         if (DemandedElts[i]) {
2100           unsigned Offset = (i % Scale) * BitWidth;
2101           DemandedSrcBits.insertBits(DemandedBits, Offset);
2102           DemandedSrcElts.setBit(i / Scale);
2103         }
2104 
2105       if (SrcVT.isVector()) {
2106         APInt KnownSrcUndef, KnownSrcZero;
2107         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2108                                        KnownSrcZero, TLO, Depth + 1))
2109           return true;
2110       }
2111 
2112       KnownBits KnownSrcBits;
2113       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2114                                KnownSrcBits, TLO, Depth + 1))
2115         return true;
2116     }
2117 
2118     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2119     // recursive call where Known may be useful to the caller.
2120     if (Depth > 0) {
2121       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2122       return false;
2123     }
2124     break;
2125   }
2126   case ISD::ADD:
2127   case ISD::MUL:
2128   case ISD::SUB: {
2129     // Add, Sub, and Mul don't demand any bits in positions beyond that
2130     // of the highest bit demanded of them.
2131     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2132     SDNodeFlags Flags = Op.getNode()->getFlags();
2133     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2134     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2135     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2136                              Depth + 1) ||
2137         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2138                              Depth + 1) ||
2139         // See if the operation should be performed at a smaller bit width.
2140         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2141       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2142         // Disable the nsw and nuw flags. We can no longer guarantee that we
2143         // won't wrap after simplification.
2144         Flags.setNoSignedWrap(false);
2145         Flags.setNoUnsignedWrap(false);
2146         SDValue NewOp =
2147             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2148         return TLO.CombineTo(Op, NewOp);
2149       }
2150       return true;
2151     }
2152 
2153     // Attempt to avoid multi-use ops if we don't need anything from them.
2154     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2155       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2156           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2157       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2158           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2159       if (DemandedOp0 || DemandedOp1) {
2160         Flags.setNoSignedWrap(false);
2161         Flags.setNoUnsignedWrap(false);
2162         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2163         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2164         SDValue NewOp =
2165             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2166         return TLO.CombineTo(Op, NewOp);
2167       }
2168     }
2169 
2170     // If we have a constant operand, we may be able to turn it into -1 if we
2171     // do not demand the high bits. This can make the constant smaller to
2172     // encode, allow more general folding, or match specialized instruction
2173     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2174     // is probably not useful (and could be detrimental).
2175     ConstantSDNode *C = isConstOrConstSplat(Op1);
2176     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2177     if (C && !C->isAllOnesValue() && !C->isOne() &&
2178         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2179       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2180       // Disable the nsw and nuw flags. We can no longer guarantee that we
2181       // won't wrap after simplification.
2182       Flags.setNoSignedWrap(false);
2183       Flags.setNoUnsignedWrap(false);
2184       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2185       return TLO.CombineTo(Op, NewOp);
2186     }
2187 
2188     LLVM_FALLTHROUGH;
2189   }
2190   default:
2191     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2192       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2193                                             Known, TLO, Depth))
2194         return true;
2195       break;
2196     }
2197 
2198     // Just use computeKnownBits to compute output bits.
2199     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2200     break;
2201   }
2202 
2203   // If we know the value of all of the demanded bits, return this as a
2204   // constant.
2205   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2206     // Avoid folding to a constant if any OpaqueConstant is involved.
2207     const SDNode *N = Op.getNode();
2208     for (SDNodeIterator I = SDNodeIterator::begin(N),
2209                         E = SDNodeIterator::end(N);
2210          I != E; ++I) {
2211       SDNode *Op = *I;
2212       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2213         if (C->isOpaque())
2214           return false;
2215     }
2216     // TODO: Handle float bits as well.
2217     if (VT.isInteger())
2218       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2219   }
2220 
2221   return false;
2222 }
2223 
2224 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2225                                                 const APInt &DemandedElts,
2226                                                 APInt &KnownUndef,
2227                                                 APInt &KnownZero,
2228                                                 DAGCombinerInfo &DCI) const {
2229   SelectionDAG &DAG = DCI.DAG;
2230   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2231                         !DCI.isBeforeLegalizeOps());
2232 
2233   bool Simplified =
2234       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2235   if (Simplified) {
2236     DCI.AddToWorklist(Op.getNode());
2237     DCI.CommitTargetLoweringOpt(TLO);
2238   }
2239 
2240   return Simplified;
2241 }
2242 
2243 /// Given a vector binary operation and known undefined elements for each input
2244 /// operand, compute whether each element of the output is undefined.
2245 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2246                                          const APInt &UndefOp0,
2247                                          const APInt &UndefOp1) {
2248   EVT VT = BO.getValueType();
2249   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2250          "Vector binop only");
2251 
2252   EVT EltVT = VT.getVectorElementType();
2253   unsigned NumElts = VT.getVectorNumElements();
2254   assert(UndefOp0.getBitWidth() == NumElts &&
2255          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2256 
2257   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2258                                    const APInt &UndefVals) {
2259     if (UndefVals[Index])
2260       return DAG.getUNDEF(EltVT);
2261 
2262     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2263       // Try hard to make sure that the getNode() call is not creating temporary
2264       // nodes. Ignore opaque integers because they do not constant fold.
2265       SDValue Elt = BV->getOperand(Index);
2266       auto *C = dyn_cast<ConstantSDNode>(Elt);
2267       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2268         return Elt;
2269     }
2270 
2271     return SDValue();
2272   };
2273 
2274   APInt KnownUndef = APInt::getNullValue(NumElts);
2275   for (unsigned i = 0; i != NumElts; ++i) {
2276     // If both inputs for this element are either constant or undef and match
2277     // the element type, compute the constant/undef result for this element of
2278     // the vector.
2279     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2280     // not handle FP constants. The code within getNode() should be refactored
2281     // to avoid the danger of creating a bogus temporary node here.
2282     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2283     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2284     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2285       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2286         KnownUndef.setBit(i);
2287   }
2288   return KnownUndef;
2289 }
2290 
2291 bool TargetLowering::SimplifyDemandedVectorElts(
2292     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2293     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2294     bool AssumeSingleUse) const {
2295   EVT VT = Op.getValueType();
2296   unsigned Opcode = Op.getOpcode();
2297   APInt DemandedElts = OriginalDemandedElts;
2298   unsigned NumElts = DemandedElts.getBitWidth();
2299   assert(VT.isVector() && "Expected vector op");
2300 
2301   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2302 
2303   // TODO: For now we assume we know nothing about scalable vectors.
2304   if (VT.isScalableVector())
2305     return false;
2306 
2307   assert(VT.getVectorNumElements() == NumElts &&
2308          "Mask size mismatches value type element count!");
2309 
2310   // Undef operand.
2311   if (Op.isUndef()) {
2312     KnownUndef.setAllBits();
2313     return false;
2314   }
2315 
2316   // If Op has other users, assume that all elements are needed.
2317   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2318     DemandedElts.setAllBits();
2319 
2320   // Not demanding any elements from Op.
2321   if (DemandedElts == 0) {
2322     KnownUndef.setAllBits();
2323     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2324   }
2325 
2326   // Limit search depth.
2327   if (Depth >= SelectionDAG::MaxRecursionDepth)
2328     return false;
2329 
2330   SDLoc DL(Op);
2331   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2332 
2333   // Helper for demanding the specified elements and all the bits of both binary
2334   // operands.
2335   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2336     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2337                                                            TLO.DAG, Depth + 1);
2338     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2339                                                            TLO.DAG, Depth + 1);
2340     if (NewOp0 || NewOp1) {
2341       SDValue NewOp = TLO.DAG.getNode(
2342           Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2343       return TLO.CombineTo(Op, NewOp);
2344     }
2345     return false;
2346   };
2347 
2348   switch (Opcode) {
2349   case ISD::SCALAR_TO_VECTOR: {
2350     if (!DemandedElts[0]) {
2351       KnownUndef.setAllBits();
2352       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2353     }
2354     KnownUndef.setHighBits(NumElts - 1);
2355     break;
2356   }
2357   case ISD::BITCAST: {
2358     SDValue Src = Op.getOperand(0);
2359     EVT SrcVT = Src.getValueType();
2360 
2361     // We only handle vectors here.
2362     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2363     if (!SrcVT.isVector())
2364       break;
2365 
2366     // Fast handling of 'identity' bitcasts.
2367     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2368     if (NumSrcElts == NumElts)
2369       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2370                                         KnownZero, TLO, Depth + 1);
2371 
2372     APInt SrcZero, SrcUndef;
2373     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2374 
2375     // Bitcast from 'large element' src vector to 'small element' vector, we
2376     // must demand a source element if any DemandedElt maps to it.
2377     if ((NumElts % NumSrcElts) == 0) {
2378       unsigned Scale = NumElts / NumSrcElts;
2379       for (unsigned i = 0; i != NumElts; ++i)
2380         if (DemandedElts[i])
2381           SrcDemandedElts.setBit(i / Scale);
2382 
2383       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2384                                      TLO, Depth + 1))
2385         return true;
2386 
2387       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2388       // of the large element.
2389       // TODO - bigendian once we have test coverage.
2390       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2391         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2392         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2393         for (unsigned i = 0; i != NumElts; ++i)
2394           if (DemandedElts[i]) {
2395             unsigned Ofs = (i % Scale) * EltSizeInBits;
2396             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2397           }
2398 
2399         KnownBits Known;
2400         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2401                                  TLO, Depth + 1))
2402           return true;
2403       }
2404 
2405       // If the src element is zero/undef then all the output elements will be -
2406       // only demanded elements are guaranteed to be correct.
2407       for (unsigned i = 0; i != NumSrcElts; ++i) {
2408         if (SrcDemandedElts[i]) {
2409           if (SrcZero[i])
2410             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2411           if (SrcUndef[i])
2412             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2413         }
2414       }
2415     }
2416 
2417     // Bitcast from 'small element' src vector to 'large element' vector, we
2418     // demand all smaller source elements covered by the larger demanded element
2419     // of this vector.
2420     if ((NumSrcElts % NumElts) == 0) {
2421       unsigned Scale = NumSrcElts / NumElts;
2422       for (unsigned i = 0; i != NumElts; ++i)
2423         if (DemandedElts[i])
2424           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2425 
2426       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2427                                      TLO, Depth + 1))
2428         return true;
2429 
2430       // If all the src elements covering an output element are zero/undef, then
2431       // the output element will be as well, assuming it was demanded.
2432       for (unsigned i = 0; i != NumElts; ++i) {
2433         if (DemandedElts[i]) {
2434           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2435             KnownZero.setBit(i);
2436           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2437             KnownUndef.setBit(i);
2438         }
2439       }
2440     }
2441     break;
2442   }
2443   case ISD::BUILD_VECTOR: {
2444     // Check all elements and simplify any unused elements with UNDEF.
2445     if (!DemandedElts.isAllOnesValue()) {
2446       // Don't simplify BROADCASTS.
2447       if (llvm::any_of(Op->op_values(),
2448                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2449         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2450         bool Updated = false;
2451         for (unsigned i = 0; i != NumElts; ++i) {
2452           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2453             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2454             KnownUndef.setBit(i);
2455             Updated = true;
2456           }
2457         }
2458         if (Updated)
2459           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2460       }
2461     }
2462     for (unsigned i = 0; i != NumElts; ++i) {
2463       SDValue SrcOp = Op.getOperand(i);
2464       if (SrcOp.isUndef()) {
2465         KnownUndef.setBit(i);
2466       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2467                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2468         KnownZero.setBit(i);
2469       }
2470     }
2471     break;
2472   }
2473   case ISD::CONCAT_VECTORS: {
2474     EVT SubVT = Op.getOperand(0).getValueType();
2475     unsigned NumSubVecs = Op.getNumOperands();
2476     unsigned NumSubElts = SubVT.getVectorNumElements();
2477     for (unsigned i = 0; i != NumSubVecs; ++i) {
2478       SDValue SubOp = Op.getOperand(i);
2479       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2480       APInt SubUndef, SubZero;
2481       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2482                                      Depth + 1))
2483         return true;
2484       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2485       KnownZero.insertBits(SubZero, i * NumSubElts);
2486     }
2487     break;
2488   }
2489   case ISD::INSERT_SUBVECTOR: {
2490     // Demand any elements from the subvector and the remainder from the src its
2491     // inserted into.
2492     SDValue Src = Op.getOperand(0);
2493     SDValue Sub = Op.getOperand(1);
2494     uint64_t Idx = Op.getConstantOperandVal(2);
2495     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2496     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2497     APInt DemandedSrcElts = DemandedElts;
2498     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
2499 
2500     APInt SubUndef, SubZero;
2501     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2502                                    Depth + 1))
2503       return true;
2504 
2505     // If none of the src operand elements are demanded, replace it with undef.
2506     if (!DemandedSrcElts && !Src.isUndef())
2507       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2508                                                TLO.DAG.getUNDEF(VT), Sub,
2509                                                Op.getOperand(2)));
2510 
2511     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2512                                    TLO, Depth + 1))
2513       return true;
2514     KnownUndef.insertBits(SubUndef, Idx);
2515     KnownZero.insertBits(SubZero, Idx);
2516 
2517     // Attempt to avoid multi-use ops if we don't need anything from them.
2518     if (!DemandedSrcElts.isAllOnesValue() ||
2519         !DemandedSubElts.isAllOnesValue()) {
2520       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2521           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2522       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2523           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2524       if (NewSrc || NewSub) {
2525         NewSrc = NewSrc ? NewSrc : Src;
2526         NewSub = NewSub ? NewSub : Sub;
2527         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2528                                         NewSub, Op.getOperand(2));
2529         return TLO.CombineTo(Op, NewOp);
2530       }
2531     }
2532     break;
2533   }
2534   case ISD::EXTRACT_SUBVECTOR: {
2535     // Offset the demanded elts by the subvector index.
2536     SDValue Src = Op.getOperand(0);
2537     if (Src.getValueType().isScalableVector())
2538       break;
2539     uint64_t Idx = Op.getConstantOperandVal(1);
2540     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2541     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2542 
2543     APInt SrcUndef, SrcZero;
2544     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2545                                    Depth + 1))
2546       return true;
2547     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2548     KnownZero = SrcZero.extractBits(NumElts, Idx);
2549 
2550     // Attempt to avoid multi-use ops if we don't need anything from them.
2551     if (!DemandedElts.isAllOnesValue()) {
2552       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2553           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2554       if (NewSrc) {
2555         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2556                                         Op.getOperand(1));
2557         return TLO.CombineTo(Op, NewOp);
2558       }
2559     }
2560     break;
2561   }
2562   case ISD::INSERT_VECTOR_ELT: {
2563     SDValue Vec = Op.getOperand(0);
2564     SDValue Scl = Op.getOperand(1);
2565     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2566 
2567     // For a legal, constant insertion index, if we don't need this insertion
2568     // then strip it, else remove it from the demanded elts.
2569     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2570       unsigned Idx = CIdx->getZExtValue();
2571       if (!DemandedElts[Idx])
2572         return TLO.CombineTo(Op, Vec);
2573 
2574       APInt DemandedVecElts(DemandedElts);
2575       DemandedVecElts.clearBit(Idx);
2576       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2577                                      KnownZero, TLO, Depth + 1))
2578         return true;
2579 
2580       KnownUndef.clearBit(Idx);
2581       if (Scl.isUndef())
2582         KnownUndef.setBit(Idx);
2583 
2584       KnownZero.clearBit(Idx);
2585       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2586         KnownZero.setBit(Idx);
2587       break;
2588     }
2589 
2590     APInt VecUndef, VecZero;
2591     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2592                                    Depth + 1))
2593       return true;
2594     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2595     break;
2596   }
2597   case ISD::VSELECT: {
2598     // Try to transform the select condition based on the current demanded
2599     // elements.
2600     // TODO: If a condition element is undef, we can choose from one arm of the
2601     //       select (and if one arm is undef, then we can propagate that to the
2602     //       result).
2603     // TODO - add support for constant vselect masks (see IR version of this).
2604     APInt UnusedUndef, UnusedZero;
2605     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2606                                    UnusedZero, TLO, Depth + 1))
2607       return true;
2608 
2609     // See if we can simplify either vselect operand.
2610     APInt DemandedLHS(DemandedElts);
2611     APInt DemandedRHS(DemandedElts);
2612     APInt UndefLHS, ZeroLHS;
2613     APInt UndefRHS, ZeroRHS;
2614     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2615                                    ZeroLHS, TLO, Depth + 1))
2616       return true;
2617     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2618                                    ZeroRHS, TLO, Depth + 1))
2619       return true;
2620 
2621     KnownUndef = UndefLHS & UndefRHS;
2622     KnownZero = ZeroLHS & ZeroRHS;
2623     break;
2624   }
2625   case ISD::VECTOR_SHUFFLE: {
2626     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2627 
2628     // Collect demanded elements from shuffle operands..
2629     APInt DemandedLHS(NumElts, 0);
2630     APInt DemandedRHS(NumElts, 0);
2631     for (unsigned i = 0; i != NumElts; ++i) {
2632       int M = ShuffleMask[i];
2633       if (M < 0 || !DemandedElts[i])
2634         continue;
2635       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2636       if (M < (int)NumElts)
2637         DemandedLHS.setBit(M);
2638       else
2639         DemandedRHS.setBit(M - NumElts);
2640     }
2641 
2642     // See if we can simplify either shuffle operand.
2643     APInt UndefLHS, ZeroLHS;
2644     APInt UndefRHS, ZeroRHS;
2645     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2646                                    ZeroLHS, TLO, Depth + 1))
2647       return true;
2648     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2649                                    ZeroRHS, TLO, Depth + 1))
2650       return true;
2651 
2652     // Simplify mask using undef elements from LHS/RHS.
2653     bool Updated = false;
2654     bool IdentityLHS = true, IdentityRHS = true;
2655     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2656     for (unsigned i = 0; i != NumElts; ++i) {
2657       int &M = NewMask[i];
2658       if (M < 0)
2659         continue;
2660       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2661           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2662         Updated = true;
2663         M = -1;
2664       }
2665       IdentityLHS &= (M < 0) || (M == (int)i);
2666       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2667     }
2668 
2669     // Update legal shuffle masks based on demanded elements if it won't reduce
2670     // to Identity which can cause premature removal of the shuffle mask.
2671     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2672       SDValue LegalShuffle =
2673           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2674                                   NewMask, TLO.DAG);
2675       if (LegalShuffle)
2676         return TLO.CombineTo(Op, LegalShuffle);
2677     }
2678 
2679     // Propagate undef/zero elements from LHS/RHS.
2680     for (unsigned i = 0; i != NumElts; ++i) {
2681       int M = ShuffleMask[i];
2682       if (M < 0) {
2683         KnownUndef.setBit(i);
2684       } else if (M < (int)NumElts) {
2685         if (UndefLHS[M])
2686           KnownUndef.setBit(i);
2687         if (ZeroLHS[M])
2688           KnownZero.setBit(i);
2689       } else {
2690         if (UndefRHS[M - NumElts])
2691           KnownUndef.setBit(i);
2692         if (ZeroRHS[M - NumElts])
2693           KnownZero.setBit(i);
2694       }
2695     }
2696     break;
2697   }
2698   case ISD::ANY_EXTEND_VECTOR_INREG:
2699   case ISD::SIGN_EXTEND_VECTOR_INREG:
2700   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2701     APInt SrcUndef, SrcZero;
2702     SDValue Src = Op.getOperand(0);
2703     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2704     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2705     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2706                                    Depth + 1))
2707       return true;
2708     KnownZero = SrcZero.zextOrTrunc(NumElts);
2709     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2710 
2711     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2712         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2713         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2714       // aext - if we just need the bottom element then we can bitcast.
2715       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2716     }
2717 
2718     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2719       // zext(undef) upper bits are guaranteed to be zero.
2720       if (DemandedElts.isSubsetOf(KnownUndef))
2721         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2722       KnownUndef.clearAllBits();
2723     }
2724     break;
2725   }
2726 
2727   // TODO: There are more binop opcodes that could be handled here - MIN,
2728   // MAX, saturated math, etc.
2729   case ISD::OR:
2730   case ISD::XOR:
2731   case ISD::ADD:
2732   case ISD::SUB:
2733   case ISD::FADD:
2734   case ISD::FSUB:
2735   case ISD::FMUL:
2736   case ISD::FDIV:
2737   case ISD::FREM: {
2738     SDValue Op0 = Op.getOperand(0);
2739     SDValue Op1 = Op.getOperand(1);
2740 
2741     APInt UndefRHS, ZeroRHS;
2742     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2743                                    Depth + 1))
2744       return true;
2745     APInt UndefLHS, ZeroLHS;
2746     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2747                                    Depth + 1))
2748       return true;
2749 
2750     KnownZero = ZeroLHS & ZeroRHS;
2751     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2752 
2753     // Attempt to avoid multi-use ops if we don't need anything from them.
2754     // TODO - use KnownUndef to relax the demandedelts?
2755     if (!DemandedElts.isAllOnesValue())
2756       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2757         return true;
2758     break;
2759   }
2760   case ISD::SHL:
2761   case ISD::SRL:
2762   case ISD::SRA:
2763   case ISD::ROTL:
2764   case ISD::ROTR: {
2765     SDValue Op0 = Op.getOperand(0);
2766     SDValue Op1 = Op.getOperand(1);
2767 
2768     APInt UndefRHS, ZeroRHS;
2769     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2770                                    Depth + 1))
2771       return true;
2772     APInt UndefLHS, ZeroLHS;
2773     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2774                                    Depth + 1))
2775       return true;
2776 
2777     KnownZero = ZeroLHS;
2778     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2779 
2780     // Attempt to avoid multi-use ops if we don't need anything from them.
2781     // TODO - use KnownUndef to relax the demandedelts?
2782     if (!DemandedElts.isAllOnesValue())
2783       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2784         return true;
2785     break;
2786   }
2787   case ISD::MUL:
2788   case ISD::AND: {
2789     SDValue Op0 = Op.getOperand(0);
2790     SDValue Op1 = Op.getOperand(1);
2791 
2792     APInt SrcUndef, SrcZero;
2793     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
2794                                    Depth + 1))
2795       return true;
2796     if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
2797                                    TLO, Depth + 1))
2798       return true;
2799 
2800     // If either side has a zero element, then the result element is zero, even
2801     // if the other is an UNDEF.
2802     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2803     // and then handle 'and' nodes with the rest of the binop opcodes.
2804     KnownZero |= SrcZero;
2805     KnownUndef &= SrcUndef;
2806     KnownUndef &= ~KnownZero;
2807 
2808     // Attempt to avoid multi-use ops if we don't need anything from them.
2809     // TODO - use KnownUndef to relax the demandedelts?
2810     if (!DemandedElts.isAllOnesValue())
2811       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2812         return true;
2813     break;
2814   }
2815   case ISD::TRUNCATE:
2816   case ISD::SIGN_EXTEND:
2817   case ISD::ZERO_EXTEND:
2818     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2819                                    KnownZero, TLO, Depth + 1))
2820       return true;
2821 
2822     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2823       // zext(undef) upper bits are guaranteed to be zero.
2824       if (DemandedElts.isSubsetOf(KnownUndef))
2825         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2826       KnownUndef.clearAllBits();
2827     }
2828     break;
2829   default: {
2830     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2831       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2832                                                   KnownZero, TLO, Depth))
2833         return true;
2834     } else {
2835       KnownBits Known;
2836       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2837       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2838                                TLO, Depth, AssumeSingleUse))
2839         return true;
2840     }
2841     break;
2842   }
2843   }
2844   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2845 
2846   // Constant fold all undef cases.
2847   // TODO: Handle zero cases as well.
2848   if (DemandedElts.isSubsetOf(KnownUndef))
2849     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2850 
2851   return false;
2852 }
2853 
2854 /// Determine which of the bits specified in Mask are known to be either zero or
2855 /// one and return them in the Known.
2856 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2857                                                    KnownBits &Known,
2858                                                    const APInt &DemandedElts,
2859                                                    const SelectionDAG &DAG,
2860                                                    unsigned Depth) const {
2861   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2862           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2863           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2864           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2865          "Should use MaskedValueIsZero if you don't know whether Op"
2866          " is a target node!");
2867   Known.resetAll();
2868 }
2869 
2870 void TargetLowering::computeKnownBitsForTargetInstr(
2871     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2872     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2873     unsigned Depth) const {
2874   Known.resetAll();
2875 }
2876 
2877 void TargetLowering::computeKnownBitsForFrameIndex(
2878   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
2879   // The low bits are known zero if the pointer is aligned.
2880   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
2881 }
2882 
2883 Align TargetLowering::computeKnownAlignForTargetInstr(
2884   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2885   unsigned Depth) const {
2886   return Align(1);
2887 }
2888 
2889 /// This method can be implemented by targets that want to expose additional
2890 /// information about sign bits to the DAG Combiner.
2891 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2892                                                          const APInt &,
2893                                                          const SelectionDAG &,
2894                                                          unsigned Depth) const {
2895   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2896           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2897           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2898           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2899          "Should use ComputeNumSignBits if you don't know whether Op"
2900          " is a target node!");
2901   return 1;
2902 }
2903 
2904 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
2905   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
2906   const MachineRegisterInfo &MRI, unsigned Depth) const {
2907   return 1;
2908 }
2909 
2910 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2911     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2912     TargetLoweringOpt &TLO, unsigned Depth) const {
2913   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2914           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2915           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2916           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2917          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2918          " is a target node!");
2919   return false;
2920 }
2921 
2922 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2923     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2924     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2925   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2926           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2927           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2928           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2929          "Should use SimplifyDemandedBits if you don't know whether Op"
2930          " is a target node!");
2931   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2932   return false;
2933 }
2934 
2935 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2936     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2937     SelectionDAG &DAG, unsigned Depth) const {
2938   assert(
2939       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2940        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2941        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2942        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2943       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2944       " is a target node!");
2945   return SDValue();
2946 }
2947 
2948 SDValue
2949 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2950                                         SDValue N1, MutableArrayRef<int> Mask,
2951                                         SelectionDAG &DAG) const {
2952   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2953   if (!LegalMask) {
2954     std::swap(N0, N1);
2955     ShuffleVectorSDNode::commuteMask(Mask);
2956     LegalMask = isShuffleMaskLegal(Mask, VT);
2957   }
2958 
2959   if (!LegalMask)
2960     return SDValue();
2961 
2962   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2963 }
2964 
2965 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2966   return nullptr;
2967 }
2968 
2969 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2970                                                   const SelectionDAG &DAG,
2971                                                   bool SNaN,
2972                                                   unsigned Depth) const {
2973   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2974           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2975           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2976           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2977          "Should use isKnownNeverNaN if you don't know whether Op"
2978          " is a target node!");
2979   return false;
2980 }
2981 
2982 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2983 // work with truncating build vectors and vectors with elements of less than
2984 // 8 bits.
2985 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2986   if (!N)
2987     return false;
2988 
2989   APInt CVal;
2990   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2991     CVal = CN->getAPIntValue();
2992   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2993     auto *CN = BV->getConstantSplatNode();
2994     if (!CN)
2995       return false;
2996 
2997     // If this is a truncating build vector, truncate the splat value.
2998     // Otherwise, we may fail to match the expected values below.
2999     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
3000     CVal = CN->getAPIntValue();
3001     if (BVEltWidth < CVal.getBitWidth())
3002       CVal = CVal.trunc(BVEltWidth);
3003   } else {
3004     return false;
3005   }
3006 
3007   switch (getBooleanContents(N->getValueType(0))) {
3008   case UndefinedBooleanContent:
3009     return CVal[0];
3010   case ZeroOrOneBooleanContent:
3011     return CVal.isOneValue();
3012   case ZeroOrNegativeOneBooleanContent:
3013     return CVal.isAllOnesValue();
3014   }
3015 
3016   llvm_unreachable("Invalid boolean contents");
3017 }
3018 
3019 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
3020   if (!N)
3021     return false;
3022 
3023   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3024   if (!CN) {
3025     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3026     if (!BV)
3027       return false;
3028 
3029     // Only interested in constant splats, we don't care about undef
3030     // elements in identifying boolean constants and getConstantSplatNode
3031     // returns NULL if all ops are undef;
3032     CN = BV->getConstantSplatNode();
3033     if (!CN)
3034       return false;
3035   }
3036 
3037   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3038     return !CN->getAPIntValue()[0];
3039 
3040   return CN->isNullValue();
3041 }
3042 
3043 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3044                                        bool SExt) const {
3045   if (VT == MVT::i1)
3046     return N->isOne();
3047 
3048   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3049   switch (Cnt) {
3050   case TargetLowering::ZeroOrOneBooleanContent:
3051     // An extended value of 1 is always true, unless its original type is i1,
3052     // in which case it will be sign extended to -1.
3053     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3054   case TargetLowering::UndefinedBooleanContent:
3055   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3056     return N->isAllOnesValue() && SExt;
3057   }
3058   llvm_unreachable("Unexpected enumeration.");
3059 }
3060 
3061 /// This helper function of SimplifySetCC tries to optimize the comparison when
3062 /// either operand of the SetCC node is a bitwise-and instruction.
3063 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3064                                          ISD::CondCode Cond, const SDLoc &DL,
3065                                          DAGCombinerInfo &DCI) const {
3066   // Match these patterns in any of their permutations:
3067   // (X & Y) == Y
3068   // (X & Y) != Y
3069   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3070     std::swap(N0, N1);
3071 
3072   EVT OpVT = N0.getValueType();
3073   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3074       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3075     return SDValue();
3076 
3077   SDValue X, Y;
3078   if (N0.getOperand(0) == N1) {
3079     X = N0.getOperand(1);
3080     Y = N0.getOperand(0);
3081   } else if (N0.getOperand(1) == N1) {
3082     X = N0.getOperand(0);
3083     Y = N0.getOperand(1);
3084   } else {
3085     return SDValue();
3086   }
3087 
3088   SelectionDAG &DAG = DCI.DAG;
3089   SDValue Zero = DAG.getConstant(0, DL, OpVT);
3090   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3091     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3092     // Note that where Y is variable and is known to have at most one bit set
3093     // (for example, if it is Z & 1) we cannot do this; the expressions are not
3094     // equivalent when Y == 0.
3095     assert(OpVT.isInteger());
3096     Cond = ISD::getSetCCInverse(Cond, OpVT);
3097     if (DCI.isBeforeLegalizeOps() ||
3098         isCondCodeLegal(Cond, N0.getSimpleValueType()))
3099       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3100   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3101     // If the target supports an 'and-not' or 'and-complement' logic operation,
3102     // try to use that to make a comparison operation more efficient.
3103     // But don't do this transform if the mask is a single bit because there are
3104     // more efficient ways to deal with that case (for example, 'bt' on x86 or
3105     // 'rlwinm' on PPC).
3106 
3107     // Bail out if the compare operand that we want to turn into a zero is
3108     // already a zero (otherwise, infinite loop).
3109     auto *YConst = dyn_cast<ConstantSDNode>(Y);
3110     if (YConst && YConst->isNullValue())
3111       return SDValue();
3112 
3113     // Transform this into: ~X & Y == 0.
3114     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3115     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3116     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3117   }
3118 
3119   return SDValue();
3120 }
3121 
3122 /// There are multiple IR patterns that could be checking whether certain
3123 /// truncation of a signed number would be lossy or not. The pattern which is
3124 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3125 /// We are looking for the following pattern: (KeptBits is a constant)
3126 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3127 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3128 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
3129 /// We will unfold it into the natural trunc+sext pattern:
3130 ///   ((%x << C) a>> C) dstcond %x
3131 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
3132 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3133     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3134     const SDLoc &DL) const {
3135   // We must be comparing with a constant.
3136   ConstantSDNode *C1;
3137   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3138     return SDValue();
3139 
3140   // N0 should be:  add %x, (1 << (KeptBits-1))
3141   if (N0->getOpcode() != ISD::ADD)
3142     return SDValue();
3143 
3144   // And we must be 'add'ing a constant.
3145   ConstantSDNode *C01;
3146   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3147     return SDValue();
3148 
3149   SDValue X = N0->getOperand(0);
3150   EVT XVT = X.getValueType();
3151 
3152   // Validate constants ...
3153 
3154   APInt I1 = C1->getAPIntValue();
3155 
3156   ISD::CondCode NewCond;
3157   if (Cond == ISD::CondCode::SETULT) {
3158     NewCond = ISD::CondCode::SETEQ;
3159   } else if (Cond == ISD::CondCode::SETULE) {
3160     NewCond = ISD::CondCode::SETEQ;
3161     // But need to 'canonicalize' the constant.
3162     I1 += 1;
3163   } else if (Cond == ISD::CondCode::SETUGT) {
3164     NewCond = ISD::CondCode::SETNE;
3165     // But need to 'canonicalize' the constant.
3166     I1 += 1;
3167   } else if (Cond == ISD::CondCode::SETUGE) {
3168     NewCond = ISD::CondCode::SETNE;
3169   } else
3170     return SDValue();
3171 
3172   APInt I01 = C01->getAPIntValue();
3173 
3174   auto checkConstants = [&I1, &I01]() -> bool {
3175     // Both of them must be power-of-two, and the constant from setcc is bigger.
3176     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3177   };
3178 
3179   if (checkConstants()) {
3180     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
3181   } else {
3182     // What if we invert constants? (and the target predicate)
3183     I1.negate();
3184     I01.negate();
3185     assert(XVT.isInteger());
3186     NewCond = getSetCCInverse(NewCond, XVT);
3187     if (!checkConstants())
3188       return SDValue();
3189     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
3190   }
3191 
3192   // They are power-of-two, so which bit is set?
3193   const unsigned KeptBits = I1.logBase2();
3194   const unsigned KeptBitsMinusOne = I01.logBase2();
3195 
3196   // Magic!
3197   if (KeptBits != (KeptBitsMinusOne + 1))
3198     return SDValue();
3199   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3200 
3201   // We don't want to do this in every single case.
3202   SelectionDAG &DAG = DCI.DAG;
3203   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3204           XVT, KeptBits))
3205     return SDValue();
3206 
3207   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3208   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3209 
3210   // Unfold into:  ((%x << C) a>> C) cond %x
3211   // Where 'cond' will be either 'eq' or 'ne'.
3212   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3213   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3214   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3215   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3216 
3217   return T2;
3218 }
3219 
3220 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3221 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3222     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3223     DAGCombinerInfo &DCI, const SDLoc &DL) const {
3224   assert(isConstOrConstSplat(N1C) &&
3225          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3226          "Should be a comparison with 0.");
3227   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3228          "Valid only for [in]equality comparisons.");
3229 
3230   unsigned NewShiftOpcode;
3231   SDValue X, C, Y;
3232 
3233   SelectionDAG &DAG = DCI.DAG;
3234   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3235 
3236   // Look for '(C l>>/<< Y)'.
3237   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3238     // The shift should be one-use.
3239     if (!V.hasOneUse())
3240       return false;
3241     unsigned OldShiftOpcode = V.getOpcode();
3242     switch (OldShiftOpcode) {
3243     case ISD::SHL:
3244       NewShiftOpcode = ISD::SRL;
3245       break;
3246     case ISD::SRL:
3247       NewShiftOpcode = ISD::SHL;
3248       break;
3249     default:
3250       return false; // must be a logical shift.
3251     }
3252     // We should be shifting a constant.
3253     // FIXME: best to use isConstantOrConstantVector().
3254     C = V.getOperand(0);
3255     ConstantSDNode *CC =
3256         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3257     if (!CC)
3258       return false;
3259     Y = V.getOperand(1);
3260 
3261     ConstantSDNode *XC =
3262         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3263     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3264         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3265   };
3266 
3267   // LHS of comparison should be an one-use 'and'.
3268   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3269     return SDValue();
3270 
3271   X = N0.getOperand(0);
3272   SDValue Mask = N0.getOperand(1);
3273 
3274   // 'and' is commutative!
3275   if (!Match(Mask)) {
3276     std::swap(X, Mask);
3277     if (!Match(Mask))
3278       return SDValue();
3279   }
3280 
3281   EVT VT = X.getValueType();
3282 
3283   // Produce:
3284   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3285   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3286   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3287   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3288   return T2;
3289 }
3290 
3291 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3292 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3293 /// handle the commuted versions of these patterns.
3294 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3295                                            ISD::CondCode Cond, const SDLoc &DL,
3296                                            DAGCombinerInfo &DCI) const {
3297   unsigned BOpcode = N0.getOpcode();
3298   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3299          "Unexpected binop");
3300   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3301 
3302   // (X + Y) == X --> Y == 0
3303   // (X - Y) == X --> Y == 0
3304   // (X ^ Y) == X --> Y == 0
3305   SelectionDAG &DAG = DCI.DAG;
3306   EVT OpVT = N0.getValueType();
3307   SDValue X = N0.getOperand(0);
3308   SDValue Y = N0.getOperand(1);
3309   if (X == N1)
3310     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3311 
3312   if (Y != N1)
3313     return SDValue();
3314 
3315   // (X + Y) == Y --> X == 0
3316   // (X ^ Y) == Y --> X == 0
3317   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3318     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3319 
3320   // The shift would not be valid if the operands are boolean (i1).
3321   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3322     return SDValue();
3323 
3324   // (X - Y) == Y --> X == Y << 1
3325   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3326                                  !DCI.isBeforeLegalize());
3327   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3328   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3329   if (!DCI.isCalledByLegalizer())
3330     DCI.AddToWorklist(YShl1.getNode());
3331   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3332 }
3333 
3334 /// Try to simplify a setcc built with the specified operands and cc. If it is
3335 /// unable to simplify it, return a null SDValue.
3336 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3337                                       ISD::CondCode Cond, bool foldBooleans,
3338                                       DAGCombinerInfo &DCI,
3339                                       const SDLoc &dl) const {
3340   SelectionDAG &DAG = DCI.DAG;
3341   const DataLayout &Layout = DAG.getDataLayout();
3342   EVT OpVT = N0.getValueType();
3343 
3344   // Constant fold or commute setcc.
3345   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3346     return Fold;
3347 
3348   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3349   // TODO: Handle non-splat vector constants. All undef causes trouble.
3350   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3351   if (isConstOrConstSplat(N0) &&
3352       (DCI.isBeforeLegalizeOps() ||
3353        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3354     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3355 
3356   // If we have a subtract with the same 2 non-constant operands as this setcc
3357   // -- but in reverse order -- then try to commute the operands of this setcc
3358   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3359   // instruction on some targets.
3360   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3361       (DCI.isBeforeLegalizeOps() ||
3362        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3363       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3364       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3365     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3366 
3367   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3368     const APInt &C1 = N1C->getAPIntValue();
3369 
3370     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3371     // equality comparison, then we're just comparing whether X itself is
3372     // zero.
3373     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3374         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3375         N0.getOperand(1).getOpcode() == ISD::Constant) {
3376       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3377       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3378           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3379         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3380           // (srl (ctlz x), 5) == 0  -> X != 0
3381           // (srl (ctlz x), 5) != 1  -> X != 0
3382           Cond = ISD::SETNE;
3383         } else {
3384           // (srl (ctlz x), 5) != 0  -> X == 0
3385           // (srl (ctlz x), 5) == 1  -> X == 0
3386           Cond = ISD::SETEQ;
3387         }
3388         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3389         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3390                             Zero, Cond);
3391       }
3392     }
3393 
3394     SDValue CTPOP = N0;
3395     // Look through truncs that don't change the value of a ctpop.
3396     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3397       CTPOP = N0.getOperand(0);
3398 
3399     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3400         (N0 == CTPOP ||
3401          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3402       EVT CTVT = CTPOP.getValueType();
3403       SDValue CTOp = CTPOP.getOperand(0);
3404 
3405       // (ctpop x) u< 2 -> (x & x-1) == 0
3406       // (ctpop x) u> 1 -> (x & x-1) != 0
3407       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3408         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3409         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3410         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3411         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3412         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3413       }
3414 
3415       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3416       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3417           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3418         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3419         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3420         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3421         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3422         assert(CTVT.isInteger());
3423         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3424         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3425         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3426         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3427         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3428         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3429         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3430       }
3431     }
3432 
3433     // (zext x) == C --> x == (trunc C)
3434     // (sext x) == C --> x == (trunc C)
3435     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3436         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3437       unsigned MinBits = N0.getValueSizeInBits();
3438       SDValue PreExt;
3439       bool Signed = false;
3440       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3441         // ZExt
3442         MinBits = N0->getOperand(0).getValueSizeInBits();
3443         PreExt = N0->getOperand(0);
3444       } else if (N0->getOpcode() == ISD::AND) {
3445         // DAGCombine turns costly ZExts into ANDs
3446         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3447           if ((C->getAPIntValue()+1).isPowerOf2()) {
3448             MinBits = C->getAPIntValue().countTrailingOnes();
3449             PreExt = N0->getOperand(0);
3450           }
3451       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3452         // SExt
3453         MinBits = N0->getOperand(0).getValueSizeInBits();
3454         PreExt = N0->getOperand(0);
3455         Signed = true;
3456       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3457         // ZEXTLOAD / SEXTLOAD
3458         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3459           MinBits = LN0->getMemoryVT().getSizeInBits();
3460           PreExt = N0;
3461         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3462           Signed = true;
3463           MinBits = LN0->getMemoryVT().getSizeInBits();
3464           PreExt = N0;
3465         }
3466       }
3467 
3468       // Figure out how many bits we need to preserve this constant.
3469       unsigned ReqdBits = Signed ?
3470         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3471         C1.getActiveBits();
3472 
3473       // Make sure we're not losing bits from the constant.
3474       if (MinBits > 0 &&
3475           MinBits < C1.getBitWidth() &&
3476           MinBits >= ReqdBits) {
3477         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3478         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3479           // Will get folded away.
3480           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3481           if (MinBits == 1 && C1 == 1)
3482             // Invert the condition.
3483             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3484                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3485           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3486           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3487         }
3488 
3489         // If truncating the setcc operands is not desirable, we can still
3490         // simplify the expression in some cases:
3491         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3492         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3493         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3494         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3495         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3496         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3497         SDValue TopSetCC = N0->getOperand(0);
3498         unsigned N0Opc = N0->getOpcode();
3499         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3500         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3501             TopSetCC.getOpcode() == ISD::SETCC &&
3502             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3503             (isConstFalseVal(N1C) ||
3504              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3505 
3506           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3507                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3508 
3509           if (!Inverse)
3510             return TopSetCC;
3511 
3512           ISD::CondCode InvCond = ISD::getSetCCInverse(
3513               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3514               TopSetCC.getOperand(0).getValueType());
3515           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3516                                       TopSetCC.getOperand(1),
3517                                       InvCond);
3518         }
3519       }
3520     }
3521 
3522     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3523     // equality or unsigned, and all 1 bits of the const are in the same
3524     // partial word, see if we can shorten the load.
3525     if (DCI.isBeforeLegalize() &&
3526         !ISD::isSignedIntSetCC(Cond) &&
3527         N0.getOpcode() == ISD::AND && C1 == 0 &&
3528         N0.getNode()->hasOneUse() &&
3529         isa<LoadSDNode>(N0.getOperand(0)) &&
3530         N0.getOperand(0).getNode()->hasOneUse() &&
3531         isa<ConstantSDNode>(N0.getOperand(1))) {
3532       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3533       APInt bestMask;
3534       unsigned bestWidth = 0, bestOffset = 0;
3535       if (Lod->isSimple() && Lod->isUnindexed()) {
3536         unsigned origWidth = N0.getValueSizeInBits();
3537         unsigned maskWidth = origWidth;
3538         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3539         // 8 bits, but have to be careful...
3540         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3541           origWidth = Lod->getMemoryVT().getSizeInBits();
3542         const APInt &Mask = N0.getConstantOperandAPInt(1);
3543         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3544           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3545           for (unsigned offset=0; offset<origWidth/width; offset++) {
3546             if (Mask.isSubsetOf(newMask)) {
3547               if (Layout.isLittleEndian())
3548                 bestOffset = (uint64_t)offset * (width/8);
3549               else
3550                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3551               bestMask = Mask.lshr(offset * (width/8) * 8);
3552               bestWidth = width;
3553               break;
3554             }
3555             newMask <<= width;
3556           }
3557         }
3558       }
3559       if (bestWidth) {
3560         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3561         if (newVT.isRound() &&
3562             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3563           SDValue Ptr = Lod->getBasePtr();
3564           if (bestOffset != 0)
3565             Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
3566           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3567           SDValue NewLoad = DAG.getLoad(
3568               newVT, dl, Lod->getChain(), Ptr,
3569               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3570           return DAG.getSetCC(dl, VT,
3571                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3572                                       DAG.getConstant(bestMask.trunc(bestWidth),
3573                                                       dl, newVT)),
3574                               DAG.getConstant(0LL, dl, newVT), Cond);
3575         }
3576       }
3577     }
3578 
3579     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3580     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3581       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3582 
3583       // If the comparison constant has bits in the upper part, the
3584       // zero-extended value could never match.
3585       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3586                                               C1.getBitWidth() - InSize))) {
3587         switch (Cond) {
3588         case ISD::SETUGT:
3589         case ISD::SETUGE:
3590         case ISD::SETEQ:
3591           return DAG.getConstant(0, dl, VT);
3592         case ISD::SETULT:
3593         case ISD::SETULE:
3594         case ISD::SETNE:
3595           return DAG.getConstant(1, dl, VT);
3596         case ISD::SETGT:
3597         case ISD::SETGE:
3598           // True if the sign bit of C1 is set.
3599           return DAG.getConstant(C1.isNegative(), dl, VT);
3600         case ISD::SETLT:
3601         case ISD::SETLE:
3602           // True if the sign bit of C1 isn't set.
3603           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3604         default:
3605           break;
3606         }
3607       }
3608 
3609       // Otherwise, we can perform the comparison with the low bits.
3610       switch (Cond) {
3611       case ISD::SETEQ:
3612       case ISD::SETNE:
3613       case ISD::SETUGT:
3614       case ISD::SETUGE:
3615       case ISD::SETULT:
3616       case ISD::SETULE: {
3617         EVT newVT = N0.getOperand(0).getValueType();
3618         if (DCI.isBeforeLegalizeOps() ||
3619             (isOperationLegal(ISD::SETCC, newVT) &&
3620              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3621           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3622           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3623 
3624           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3625                                           NewConst, Cond);
3626           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3627         }
3628         break;
3629       }
3630       default:
3631         break; // todo, be more careful with signed comparisons
3632       }
3633     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3634                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3635       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3636       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3637       EVT ExtDstTy = N0.getValueType();
3638       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3639 
3640       // If the constant doesn't fit into the number of bits for the source of
3641       // the sign extension, it is impossible for both sides to be equal.
3642       if (C1.getMinSignedBits() > ExtSrcTyBits)
3643         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3644 
3645       SDValue ZextOp;
3646       EVT Op0Ty = N0.getOperand(0).getValueType();
3647       if (Op0Ty == ExtSrcTy) {
3648         ZextOp = N0.getOperand(0);
3649       } else {
3650         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3651         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3652                              DAG.getConstant(Imm, dl, Op0Ty));
3653       }
3654       if (!DCI.isCalledByLegalizer())
3655         DCI.AddToWorklist(ZextOp.getNode());
3656       // Otherwise, make this a use of a zext.
3657       return DAG.getSetCC(dl, VT, ZextOp,
3658                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3659                                                               ExtDstTyBits,
3660                                                               ExtSrcTyBits),
3661                                           dl, ExtDstTy),
3662                           Cond);
3663     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3664                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3665       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3666       if (N0.getOpcode() == ISD::SETCC &&
3667           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3668           (N0.getValueType() == MVT::i1 ||
3669            getBooleanContents(N0.getOperand(0).getValueType()) ==
3670                        ZeroOrOneBooleanContent)) {
3671         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3672         if (TrueWhenTrue)
3673           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3674         // Invert the condition.
3675         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3676         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3677         if (DCI.isBeforeLegalizeOps() ||
3678             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3679           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3680       }
3681 
3682       if ((N0.getOpcode() == ISD::XOR ||
3683            (N0.getOpcode() == ISD::AND &&
3684             N0.getOperand(0).getOpcode() == ISD::XOR &&
3685             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3686           isa<ConstantSDNode>(N0.getOperand(1)) &&
3687           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3688         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3689         // can only do this if the top bits are known zero.
3690         unsigned BitWidth = N0.getValueSizeInBits();
3691         if (DAG.MaskedValueIsZero(N0,
3692                                   APInt::getHighBitsSet(BitWidth,
3693                                                         BitWidth-1))) {
3694           // Okay, get the un-inverted input value.
3695           SDValue Val;
3696           if (N0.getOpcode() == ISD::XOR) {
3697             Val = N0.getOperand(0);
3698           } else {
3699             assert(N0.getOpcode() == ISD::AND &&
3700                     N0.getOperand(0).getOpcode() == ISD::XOR);
3701             // ((X^1)&1)^1 -> X & 1
3702             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3703                               N0.getOperand(0).getOperand(0),
3704                               N0.getOperand(1));
3705           }
3706 
3707           return DAG.getSetCC(dl, VT, Val, N1,
3708                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3709         }
3710       } else if (N1C->isOne()) {
3711         SDValue Op0 = N0;
3712         if (Op0.getOpcode() == ISD::TRUNCATE)
3713           Op0 = Op0.getOperand(0);
3714 
3715         if ((Op0.getOpcode() == ISD::XOR) &&
3716             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3717             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3718           SDValue XorLHS = Op0.getOperand(0);
3719           SDValue XorRHS = Op0.getOperand(1);
3720           // Ensure that the input setccs return an i1 type or 0/1 value.
3721           if (Op0.getValueType() == MVT::i1 ||
3722               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3723                       ZeroOrOneBooleanContent &&
3724                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3725                         ZeroOrOneBooleanContent)) {
3726             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3727             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3728             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3729           }
3730         }
3731         if (Op0.getOpcode() == ISD::AND &&
3732             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3733             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3734           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3735           if (Op0.getValueType().bitsGT(VT))
3736             Op0 = DAG.getNode(ISD::AND, dl, VT,
3737                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3738                           DAG.getConstant(1, dl, VT));
3739           else if (Op0.getValueType().bitsLT(VT))
3740             Op0 = DAG.getNode(ISD::AND, dl, VT,
3741                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3742                         DAG.getConstant(1, dl, VT));
3743 
3744           return DAG.getSetCC(dl, VT, Op0,
3745                               DAG.getConstant(0, dl, Op0.getValueType()),
3746                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3747         }
3748         if (Op0.getOpcode() == ISD::AssertZext &&
3749             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3750           return DAG.getSetCC(dl, VT, Op0,
3751                               DAG.getConstant(0, dl, Op0.getValueType()),
3752                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3753       }
3754     }
3755 
3756     // Given:
3757     //   icmp eq/ne (urem %x, %y), 0
3758     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3759     //   icmp eq/ne %x, 0
3760     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3761         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3762       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3763       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3764       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3765         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3766     }
3767 
3768     if (SDValue V =
3769             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3770       return V;
3771   }
3772 
3773   // These simplifications apply to splat vectors as well.
3774   // TODO: Handle more splat vector cases.
3775   if (auto *N1C = isConstOrConstSplat(N1)) {
3776     const APInt &C1 = N1C->getAPIntValue();
3777 
3778     APInt MinVal, MaxVal;
3779     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3780     if (ISD::isSignedIntSetCC(Cond)) {
3781       MinVal = APInt::getSignedMinValue(OperandBitSize);
3782       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3783     } else {
3784       MinVal = APInt::getMinValue(OperandBitSize);
3785       MaxVal = APInt::getMaxValue(OperandBitSize);
3786     }
3787 
3788     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3789     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3790       // X >= MIN --> true
3791       if (C1 == MinVal)
3792         return DAG.getBoolConstant(true, dl, VT, OpVT);
3793 
3794       if (!VT.isVector()) { // TODO: Support this for vectors.
3795         // X >= C0 --> X > (C0 - 1)
3796         APInt C = C1 - 1;
3797         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3798         if ((DCI.isBeforeLegalizeOps() ||
3799              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3800             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3801                                   isLegalICmpImmediate(C.getSExtValue())))) {
3802           return DAG.getSetCC(dl, VT, N0,
3803                               DAG.getConstant(C, dl, N1.getValueType()),
3804                               NewCC);
3805         }
3806       }
3807     }
3808 
3809     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3810       // X <= MAX --> true
3811       if (C1 == MaxVal)
3812         return DAG.getBoolConstant(true, dl, VT, OpVT);
3813 
3814       // X <= C0 --> X < (C0 + 1)
3815       if (!VT.isVector()) { // TODO: Support this for vectors.
3816         APInt C = C1 + 1;
3817         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3818         if ((DCI.isBeforeLegalizeOps() ||
3819              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3820             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3821                                   isLegalICmpImmediate(C.getSExtValue())))) {
3822           return DAG.getSetCC(dl, VT, N0,
3823                               DAG.getConstant(C, dl, N1.getValueType()),
3824                               NewCC);
3825         }
3826       }
3827     }
3828 
3829     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3830       if (C1 == MinVal)
3831         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3832 
3833       // TODO: Support this for vectors after legalize ops.
3834       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3835         // Canonicalize setlt X, Max --> setne X, Max
3836         if (C1 == MaxVal)
3837           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3838 
3839         // If we have setult X, 1, turn it into seteq X, 0
3840         if (C1 == MinVal+1)
3841           return DAG.getSetCC(dl, VT, N0,
3842                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3843                               ISD::SETEQ);
3844       }
3845     }
3846 
3847     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3848       if (C1 == MaxVal)
3849         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3850 
3851       // TODO: Support this for vectors after legalize ops.
3852       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3853         // Canonicalize setgt X, Min --> setne X, Min
3854         if (C1 == MinVal)
3855           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3856 
3857         // If we have setugt X, Max-1, turn it into seteq X, Max
3858         if (C1 == MaxVal-1)
3859           return DAG.getSetCC(dl, VT, N0,
3860                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3861                               ISD::SETEQ);
3862       }
3863     }
3864 
3865     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3866       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3867       if (C1.isNullValue())
3868         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3869                 VT, N0, N1, Cond, DCI, dl))
3870           return CC;
3871     }
3872 
3873     // If we have "setcc X, C0", check to see if we can shrink the immediate
3874     // by changing cc.
3875     // TODO: Support this for vectors after legalize ops.
3876     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3877       // SETUGT X, SINTMAX  -> SETLT X, 0
3878       if (Cond == ISD::SETUGT &&
3879           C1 == APInt::getSignedMaxValue(OperandBitSize))
3880         return DAG.getSetCC(dl, VT, N0,
3881                             DAG.getConstant(0, dl, N1.getValueType()),
3882                             ISD::SETLT);
3883 
3884       // SETULT X, SINTMIN  -> SETGT X, -1
3885       if (Cond == ISD::SETULT &&
3886           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3887         SDValue ConstMinusOne =
3888             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3889                             N1.getValueType());
3890         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3891       }
3892     }
3893   }
3894 
3895   // Back to non-vector simplifications.
3896   // TODO: Can we do these for vector splats?
3897   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3898     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3899     const APInt &C1 = N1C->getAPIntValue();
3900     EVT ShValTy = N0.getValueType();
3901 
3902     // Fold bit comparisons when we can.
3903     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3904         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3905         N0.getOpcode() == ISD::AND) {
3906       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3907         EVT ShiftTy =
3908             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3909         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3910           // Perform the xform if the AND RHS is a single bit.
3911           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3912           if (AndRHS->getAPIntValue().isPowerOf2() &&
3913               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3914             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3915                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3916                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3917           }
3918         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3919           // (X & 8) == 8  -->  (X & 8) >> 3
3920           // Perform the xform if C1 is a single bit.
3921           unsigned ShCt = C1.logBase2();
3922           if (C1.isPowerOf2() &&
3923               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3924             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3925                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3926                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3927           }
3928         }
3929       }
3930     }
3931 
3932     if (C1.getMinSignedBits() <= 64 &&
3933         !isLegalICmpImmediate(C1.getSExtValue())) {
3934       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3935       // (X & -256) == 256 -> (X >> 8) == 1
3936       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3937           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3938         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3939           const APInt &AndRHSC = AndRHS->getAPIntValue();
3940           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3941             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3942             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3943               SDValue Shift =
3944                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3945                             DAG.getConstant(ShiftBits, dl, ShiftTy));
3946               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3947               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3948             }
3949           }
3950         }
3951       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3952                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3953         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3954         // X <  0x100000000 -> (X >> 32) <  1
3955         // X >= 0x100000000 -> (X >> 32) >= 1
3956         // X <= 0x0ffffffff -> (X >> 32) <  1
3957         // X >  0x0ffffffff -> (X >> 32) >= 1
3958         unsigned ShiftBits;
3959         APInt NewC = C1;
3960         ISD::CondCode NewCond = Cond;
3961         if (AdjOne) {
3962           ShiftBits = C1.countTrailingOnes();
3963           NewC = NewC + 1;
3964           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3965         } else {
3966           ShiftBits = C1.countTrailingZeros();
3967         }
3968         NewC.lshrInPlace(ShiftBits);
3969         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3970             isLegalICmpImmediate(NewC.getSExtValue()) &&
3971             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3972           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3973                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3974           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3975           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3976         }
3977       }
3978     }
3979   }
3980 
3981   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3982     auto *CFP = cast<ConstantFPSDNode>(N1);
3983     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3984 
3985     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3986     // constant if knowing that the operand is non-nan is enough.  We prefer to
3987     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3988     // materialize 0.0.
3989     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3990       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3991 
3992     // setcc (fneg x), C -> setcc swap(pred) x, -C
3993     if (N0.getOpcode() == ISD::FNEG) {
3994       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3995       if (DCI.isBeforeLegalizeOps() ||
3996           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3997         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3998         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3999       }
4000     }
4001 
4002     // If the condition is not legal, see if we can find an equivalent one
4003     // which is legal.
4004     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4005       // If the comparison was an awkward floating-point == or != and one of
4006       // the comparison operands is infinity or negative infinity, convert the
4007       // condition to a less-awkward <= or >=.
4008       if (CFP->getValueAPF().isInfinity()) {
4009         bool IsNegInf = CFP->getValueAPF().isNegative();
4010         ISD::CondCode NewCond = ISD::SETCC_INVALID;
4011         switch (Cond) {
4012         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4013         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4014         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4015         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4016         default: break;
4017         }
4018         if (NewCond != ISD::SETCC_INVALID &&
4019             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4020           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4021       }
4022     }
4023   }
4024 
4025   if (N0 == N1) {
4026     // The sext(setcc()) => setcc() optimization relies on the appropriate
4027     // constant being emitted.
4028     assert(!N0.getValueType().isInteger() &&
4029            "Integer types should be handled by FoldSetCC");
4030 
4031     bool EqTrue = ISD::isTrueWhenEqual(Cond);
4032     unsigned UOF = ISD::getUnorderedFlavor(Cond);
4033     if (UOF == 2) // FP operators that are undefined on NaNs.
4034       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4035     if (UOF == unsigned(EqTrue))
4036       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4037     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
4038     // if it is not already.
4039     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4040     if (NewCond != Cond &&
4041         (DCI.isBeforeLegalizeOps() ||
4042                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4043       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4044   }
4045 
4046   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4047       N0.getValueType().isInteger()) {
4048     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4049         N0.getOpcode() == ISD::XOR) {
4050       // Simplify (X+Y) == (X+Z) -->  Y == Z
4051       if (N0.getOpcode() == N1.getOpcode()) {
4052         if (N0.getOperand(0) == N1.getOperand(0))
4053           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4054         if (N0.getOperand(1) == N1.getOperand(1))
4055           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4056         if (isCommutativeBinOp(N0.getOpcode())) {
4057           // If X op Y == Y op X, try other combinations.
4058           if (N0.getOperand(0) == N1.getOperand(1))
4059             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4060                                 Cond);
4061           if (N0.getOperand(1) == N1.getOperand(0))
4062             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4063                                 Cond);
4064         }
4065       }
4066 
4067       // If RHS is a legal immediate value for a compare instruction, we need
4068       // to be careful about increasing register pressure needlessly.
4069       bool LegalRHSImm = false;
4070 
4071       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4072         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4073           // Turn (X+C1) == C2 --> X == C2-C1
4074           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
4075             return DAG.getSetCC(dl, VT, N0.getOperand(0),
4076                                 DAG.getConstant(RHSC->getAPIntValue()-
4077                                                 LHSR->getAPIntValue(),
4078                                 dl, N0.getValueType()), Cond);
4079           }
4080 
4081           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4082           if (N0.getOpcode() == ISD::XOR)
4083             // If we know that all of the inverted bits are zero, don't bother
4084             // performing the inversion.
4085             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
4086               return
4087                 DAG.getSetCC(dl, VT, N0.getOperand(0),
4088                              DAG.getConstant(LHSR->getAPIntValue() ^
4089                                                RHSC->getAPIntValue(),
4090                                              dl, N0.getValueType()),
4091                              Cond);
4092         }
4093 
4094         // Turn (C1-X) == C2 --> X == C1-C2
4095         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
4096           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
4097             return
4098               DAG.getSetCC(dl, VT, N0.getOperand(1),
4099                            DAG.getConstant(SUBC->getAPIntValue() -
4100                                              RHSC->getAPIntValue(),
4101                                            dl, N0.getValueType()),
4102                            Cond);
4103           }
4104         }
4105 
4106         // Could RHSC fold directly into a compare?
4107         if (RHSC->getValueType(0).getSizeInBits() <= 64)
4108           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4109       }
4110 
4111       // (X+Y) == X --> Y == 0 and similar folds.
4112       // Don't do this if X is an immediate that can fold into a cmp
4113       // instruction and X+Y has other uses. It could be an induction variable
4114       // chain, and the transform would increase register pressure.
4115       if (!LegalRHSImm || N0.hasOneUse())
4116         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4117           return V;
4118     }
4119 
4120     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4121         N1.getOpcode() == ISD::XOR)
4122       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4123         return V;
4124 
4125     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4126       return V;
4127   }
4128 
4129   // Fold remainder of division by a constant.
4130   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4131       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4132     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4133 
4134     // When division is cheap or optimizing for minimum size,
4135     // fall through to DIVREM creation by skipping this fold.
4136     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
4137       if (N0.getOpcode() == ISD::UREM) {
4138         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4139           return Folded;
4140       } else if (N0.getOpcode() == ISD::SREM) {
4141         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4142           return Folded;
4143       }
4144     }
4145   }
4146 
4147   // Fold away ALL boolean setcc's.
4148   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4149     SDValue Temp;
4150     switch (Cond) {
4151     default: llvm_unreachable("Unknown integer setcc!");
4152     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
4153       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4154       N0 = DAG.getNOT(dl, Temp, OpVT);
4155       if (!DCI.isCalledByLegalizer())
4156         DCI.AddToWorklist(Temp.getNode());
4157       break;
4158     case ISD::SETNE:  // X != Y   -->  (X^Y)
4159       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4160       break;
4161     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
4162     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
4163       Temp = DAG.getNOT(dl, N0, OpVT);
4164       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4165       if (!DCI.isCalledByLegalizer())
4166         DCI.AddToWorklist(Temp.getNode());
4167       break;
4168     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
4169     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
4170       Temp = DAG.getNOT(dl, N1, OpVT);
4171       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4172       if (!DCI.isCalledByLegalizer())
4173         DCI.AddToWorklist(Temp.getNode());
4174       break;
4175     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
4176     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
4177       Temp = DAG.getNOT(dl, N0, OpVT);
4178       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4179       if (!DCI.isCalledByLegalizer())
4180         DCI.AddToWorklist(Temp.getNode());
4181       break;
4182     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
4183     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
4184       Temp = DAG.getNOT(dl, N1, OpVT);
4185       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4186       break;
4187     }
4188     if (VT.getScalarType() != MVT::i1) {
4189       if (!DCI.isCalledByLegalizer())
4190         DCI.AddToWorklist(N0.getNode());
4191       // FIXME: If running after legalize, we probably can't do this.
4192       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4193       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4194     }
4195     return N0;
4196   }
4197 
4198   // Could not fold it.
4199   return SDValue();
4200 }
4201 
4202 /// Returns true (and the GlobalValue and the offset) if the node is a
4203 /// GlobalAddress + offset.
4204 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4205                                     int64_t &Offset) const {
4206 
4207   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4208 
4209   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4210     GA = GASD->getGlobal();
4211     Offset += GASD->getOffset();
4212     return true;
4213   }
4214 
4215   if (N->getOpcode() == ISD::ADD) {
4216     SDValue N1 = N->getOperand(0);
4217     SDValue N2 = N->getOperand(1);
4218     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4219       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4220         Offset += V->getSExtValue();
4221         return true;
4222       }
4223     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4224       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4225         Offset += V->getSExtValue();
4226         return true;
4227       }
4228     }
4229   }
4230 
4231   return false;
4232 }
4233 
4234 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4235                                           DAGCombinerInfo &DCI) const {
4236   // Default implementation: no optimization.
4237   return SDValue();
4238 }
4239 
4240 //===----------------------------------------------------------------------===//
4241 //  Inline Assembler Implementation Methods
4242 //===----------------------------------------------------------------------===//
4243 
4244 TargetLowering::ConstraintType
4245 TargetLowering::getConstraintType(StringRef Constraint) const {
4246   unsigned S = Constraint.size();
4247 
4248   if (S == 1) {
4249     switch (Constraint[0]) {
4250     default: break;
4251     case 'r':
4252       return C_RegisterClass;
4253     case 'm': // memory
4254     case 'o': // offsetable
4255     case 'V': // not offsetable
4256       return C_Memory;
4257     case 'n': // Simple Integer
4258     case 'E': // Floating Point Constant
4259     case 'F': // Floating Point Constant
4260       return C_Immediate;
4261     case 'i': // Simple Integer or Relocatable Constant
4262     case 's': // Relocatable Constant
4263     case 'p': // Address.
4264     case 'X': // Allow ANY value.
4265     case 'I': // Target registers.
4266     case 'J':
4267     case 'K':
4268     case 'L':
4269     case 'M':
4270     case 'N':
4271     case 'O':
4272     case 'P':
4273     case '<':
4274     case '>':
4275       return C_Other;
4276     }
4277   }
4278 
4279   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4280     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4281       return C_Memory;
4282     return C_Register;
4283   }
4284   return C_Unknown;
4285 }
4286 
4287 /// Try to replace an X constraint, which matches anything, with another that
4288 /// has more specific requirements based on the type of the corresponding
4289 /// operand.
4290 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4291   if (ConstraintVT.isInteger())
4292     return "r";
4293   if (ConstraintVT.isFloatingPoint())
4294     return "f"; // works for many targets
4295   return nullptr;
4296 }
4297 
4298 SDValue TargetLowering::LowerAsmOutputForConstraint(
4299     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4300     SelectionDAG &DAG) const {
4301   return SDValue();
4302 }
4303 
4304 /// Lower the specified operand into the Ops vector.
4305 /// If it is invalid, don't add anything to Ops.
4306 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4307                                                   std::string &Constraint,
4308                                                   std::vector<SDValue> &Ops,
4309                                                   SelectionDAG &DAG) const {
4310 
4311   if (Constraint.length() > 1) return;
4312 
4313   char ConstraintLetter = Constraint[0];
4314   switch (ConstraintLetter) {
4315   default: break;
4316   case 'X':     // Allows any operand; labels (basic block) use this.
4317     if (Op.getOpcode() == ISD::BasicBlock ||
4318         Op.getOpcode() == ISD::TargetBlockAddress) {
4319       Ops.push_back(Op);
4320       return;
4321     }
4322     LLVM_FALLTHROUGH;
4323   case 'i':    // Simple Integer or Relocatable Constant
4324   case 'n':    // Simple Integer
4325   case 's': {  // Relocatable Constant
4326 
4327     GlobalAddressSDNode *GA;
4328     ConstantSDNode *C;
4329     BlockAddressSDNode *BA;
4330     uint64_t Offset = 0;
4331 
4332     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4333     // etc., since getelementpointer is variadic. We can't use
4334     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4335     // while in this case the GA may be furthest from the root node which is
4336     // likely an ISD::ADD.
4337     while (1) {
4338       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4339         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4340                                                  GA->getValueType(0),
4341                                                  Offset + GA->getOffset()));
4342         return;
4343       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4344                  ConstraintLetter != 's') {
4345         // gcc prints these as sign extended.  Sign extend value to 64 bits
4346         // now; without this it would get ZExt'd later in
4347         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4348         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4349         BooleanContent BCont = getBooleanContents(MVT::i64);
4350         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4351                                       : ISD::SIGN_EXTEND;
4352         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4353                                                     : C->getSExtValue();
4354         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4355                                             SDLoc(C), MVT::i64));
4356         return;
4357       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4358                  ConstraintLetter != 'n') {
4359         Ops.push_back(DAG.getTargetBlockAddress(
4360             BA->getBlockAddress(), BA->getValueType(0),
4361             Offset + BA->getOffset(), BA->getTargetFlags()));
4362         return;
4363       } else {
4364         const unsigned OpCode = Op.getOpcode();
4365         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4366           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4367             Op = Op.getOperand(1);
4368           // Subtraction is not commutative.
4369           else if (OpCode == ISD::ADD &&
4370                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4371             Op = Op.getOperand(0);
4372           else
4373             return;
4374           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4375           continue;
4376         }
4377       }
4378       return;
4379     }
4380     break;
4381   }
4382   }
4383 }
4384 
4385 std::pair<unsigned, const TargetRegisterClass *>
4386 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4387                                              StringRef Constraint,
4388                                              MVT VT) const {
4389   if (Constraint.empty() || Constraint[0] != '{')
4390     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4391   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4392 
4393   // Remove the braces from around the name.
4394   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4395 
4396   std::pair<unsigned, const TargetRegisterClass *> R =
4397       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4398 
4399   // Figure out which register class contains this reg.
4400   for (const TargetRegisterClass *RC : RI->regclasses()) {
4401     // If none of the value types for this register class are valid, we
4402     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4403     if (!isLegalRC(*RI, *RC))
4404       continue;
4405 
4406     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4407          I != E; ++I) {
4408       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4409         std::pair<unsigned, const TargetRegisterClass *> S =
4410             std::make_pair(*I, RC);
4411 
4412         // If this register class has the requested value type, return it,
4413         // otherwise keep searching and return the first class found
4414         // if no other is found which explicitly has the requested type.
4415         if (RI->isTypeLegalForClass(*RC, VT))
4416           return S;
4417         if (!R.second)
4418           R = S;
4419       }
4420     }
4421   }
4422 
4423   return R;
4424 }
4425 
4426 //===----------------------------------------------------------------------===//
4427 // Constraint Selection.
4428 
4429 /// Return true of this is an input operand that is a matching constraint like
4430 /// "4".
4431 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4432   assert(!ConstraintCode.empty() && "No known constraint!");
4433   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4434 }
4435 
4436 /// If this is an input matching constraint, this method returns the output
4437 /// operand it matches.
4438 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4439   assert(!ConstraintCode.empty() && "No known constraint!");
4440   return atoi(ConstraintCode.c_str());
4441 }
4442 
4443 /// Split up the constraint string from the inline assembly value into the
4444 /// specific constraints and their prefixes, and also tie in the associated
4445 /// operand values.
4446 /// If this returns an empty vector, and if the constraint string itself
4447 /// isn't empty, there was an error parsing.
4448 TargetLowering::AsmOperandInfoVector
4449 TargetLowering::ParseConstraints(const DataLayout &DL,
4450                                  const TargetRegisterInfo *TRI,
4451                                  const CallBase &Call) const {
4452   /// Information about all of the constraints.
4453   AsmOperandInfoVector ConstraintOperands;
4454   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
4455   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4456 
4457   // Do a prepass over the constraints, canonicalizing them, and building up the
4458   // ConstraintOperands list.
4459   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4460   unsigned ResNo = 0; // ResNo - The result number of the next output.
4461 
4462   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4463     ConstraintOperands.emplace_back(std::move(CI));
4464     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4465 
4466     // Update multiple alternative constraint count.
4467     if (OpInfo.multipleAlternatives.size() > maCount)
4468       maCount = OpInfo.multipleAlternatives.size();
4469 
4470     OpInfo.ConstraintVT = MVT::Other;
4471 
4472     // Compute the value type for each operand.
4473     switch (OpInfo.Type) {
4474     case InlineAsm::isOutput:
4475       // Indirect outputs just consume an argument.
4476       if (OpInfo.isIndirect) {
4477         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4478         break;
4479       }
4480 
4481       // The return value of the call is this value.  As such, there is no
4482       // corresponding argument.
4483       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
4484       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
4485         OpInfo.ConstraintVT =
4486             getSimpleValueType(DL, STy->getElementType(ResNo));
4487       } else {
4488         assert(ResNo == 0 && "Asm only has one result!");
4489         OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
4490       }
4491       ++ResNo;
4492       break;
4493     case InlineAsm::isInput:
4494       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4495       break;
4496     case InlineAsm::isClobber:
4497       // Nothing to do.
4498       break;
4499     }
4500 
4501     if (OpInfo.CallOperandVal) {
4502       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4503       if (OpInfo.isIndirect) {
4504         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4505         if (!PtrTy)
4506           report_fatal_error("Indirect operand for inline asm not a pointer!");
4507         OpTy = PtrTy->getElementType();
4508       }
4509 
4510       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4511       if (StructType *STy = dyn_cast<StructType>(OpTy))
4512         if (STy->getNumElements() == 1)
4513           OpTy = STy->getElementType(0);
4514 
4515       // If OpTy is not a single value, it may be a struct/union that we
4516       // can tile with integers.
4517       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4518         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4519         switch (BitSize) {
4520         default: break;
4521         case 1:
4522         case 8:
4523         case 16:
4524         case 32:
4525         case 64:
4526         case 128:
4527           OpInfo.ConstraintVT =
4528               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4529           break;
4530         }
4531       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4532         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4533         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4534       } else {
4535         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4536       }
4537     }
4538   }
4539 
4540   // If we have multiple alternative constraints, select the best alternative.
4541   if (!ConstraintOperands.empty()) {
4542     if (maCount) {
4543       unsigned bestMAIndex = 0;
4544       int bestWeight = -1;
4545       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4546       int weight = -1;
4547       unsigned maIndex;
4548       // Compute the sums of the weights for each alternative, keeping track
4549       // of the best (highest weight) one so far.
4550       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4551         int weightSum = 0;
4552         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4553              cIndex != eIndex; ++cIndex) {
4554           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4555           if (OpInfo.Type == InlineAsm::isClobber)
4556             continue;
4557 
4558           // If this is an output operand with a matching input operand,
4559           // look up the matching input. If their types mismatch, e.g. one
4560           // is an integer, the other is floating point, or their sizes are
4561           // different, flag it as an maCantMatch.
4562           if (OpInfo.hasMatchingInput()) {
4563             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4564             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4565               if ((OpInfo.ConstraintVT.isInteger() !=
4566                    Input.ConstraintVT.isInteger()) ||
4567                   (OpInfo.ConstraintVT.getSizeInBits() !=
4568                    Input.ConstraintVT.getSizeInBits())) {
4569                 weightSum = -1; // Can't match.
4570                 break;
4571               }
4572             }
4573           }
4574           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4575           if (weight == -1) {
4576             weightSum = -1;
4577             break;
4578           }
4579           weightSum += weight;
4580         }
4581         // Update best.
4582         if (weightSum > bestWeight) {
4583           bestWeight = weightSum;
4584           bestMAIndex = maIndex;
4585         }
4586       }
4587 
4588       // Now select chosen alternative in each constraint.
4589       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4590            cIndex != eIndex; ++cIndex) {
4591         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4592         if (cInfo.Type == InlineAsm::isClobber)
4593           continue;
4594         cInfo.selectAlternative(bestMAIndex);
4595       }
4596     }
4597   }
4598 
4599   // Check and hook up tied operands, choose constraint code to use.
4600   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4601        cIndex != eIndex; ++cIndex) {
4602     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4603 
4604     // If this is an output operand with a matching input operand, look up the
4605     // matching input. If their types mismatch, e.g. one is an integer, the
4606     // other is floating point, or their sizes are different, flag it as an
4607     // error.
4608     if (OpInfo.hasMatchingInput()) {
4609       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4610 
4611       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4612         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4613             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4614                                          OpInfo.ConstraintVT);
4615         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4616             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4617                                          Input.ConstraintVT);
4618         if ((OpInfo.ConstraintVT.isInteger() !=
4619              Input.ConstraintVT.isInteger()) ||
4620             (MatchRC.second != InputRC.second)) {
4621           report_fatal_error("Unsupported asm: input constraint"
4622                              " with a matching output constraint of"
4623                              " incompatible type!");
4624         }
4625       }
4626     }
4627   }
4628 
4629   return ConstraintOperands;
4630 }
4631 
4632 /// Return an integer indicating how general CT is.
4633 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4634   switch (CT) {
4635   case TargetLowering::C_Immediate:
4636   case TargetLowering::C_Other:
4637   case TargetLowering::C_Unknown:
4638     return 0;
4639   case TargetLowering::C_Register:
4640     return 1;
4641   case TargetLowering::C_RegisterClass:
4642     return 2;
4643   case TargetLowering::C_Memory:
4644     return 3;
4645   }
4646   llvm_unreachable("Invalid constraint type");
4647 }
4648 
4649 /// Examine constraint type and operand type and determine a weight value.
4650 /// This object must already have been set up with the operand type
4651 /// and the current alternative constraint selected.
4652 TargetLowering::ConstraintWeight
4653   TargetLowering::getMultipleConstraintMatchWeight(
4654     AsmOperandInfo &info, int maIndex) const {
4655   InlineAsm::ConstraintCodeVector *rCodes;
4656   if (maIndex >= (int)info.multipleAlternatives.size())
4657     rCodes = &info.Codes;
4658   else
4659     rCodes = &info.multipleAlternatives[maIndex].Codes;
4660   ConstraintWeight BestWeight = CW_Invalid;
4661 
4662   // Loop over the options, keeping track of the most general one.
4663   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4664     ConstraintWeight weight =
4665       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4666     if (weight > BestWeight)
4667       BestWeight = weight;
4668   }
4669 
4670   return BestWeight;
4671 }
4672 
4673 /// Examine constraint type and operand type and determine a weight value.
4674 /// This object must already have been set up with the operand type
4675 /// and the current alternative constraint selected.
4676 TargetLowering::ConstraintWeight
4677   TargetLowering::getSingleConstraintMatchWeight(
4678     AsmOperandInfo &info, const char *constraint) const {
4679   ConstraintWeight weight = CW_Invalid;
4680   Value *CallOperandVal = info.CallOperandVal;
4681     // If we don't have a value, we can't do a match,
4682     // but allow it at the lowest weight.
4683   if (!CallOperandVal)
4684     return CW_Default;
4685   // Look at the constraint type.
4686   switch (*constraint) {
4687     case 'i': // immediate integer.
4688     case 'n': // immediate integer with a known value.
4689       if (isa<ConstantInt>(CallOperandVal))
4690         weight = CW_Constant;
4691       break;
4692     case 's': // non-explicit intregal immediate.
4693       if (isa<GlobalValue>(CallOperandVal))
4694         weight = CW_Constant;
4695       break;
4696     case 'E': // immediate float if host format.
4697     case 'F': // immediate float.
4698       if (isa<ConstantFP>(CallOperandVal))
4699         weight = CW_Constant;
4700       break;
4701     case '<': // memory operand with autodecrement.
4702     case '>': // memory operand with autoincrement.
4703     case 'm': // memory operand.
4704     case 'o': // offsettable memory operand
4705     case 'V': // non-offsettable memory operand
4706       weight = CW_Memory;
4707       break;
4708     case 'r': // general register.
4709     case 'g': // general register, memory operand or immediate integer.
4710               // note: Clang converts "g" to "imr".
4711       if (CallOperandVal->getType()->isIntegerTy())
4712         weight = CW_Register;
4713       break;
4714     case 'X': // any operand.
4715   default:
4716     weight = CW_Default;
4717     break;
4718   }
4719   return weight;
4720 }
4721 
4722 /// If there are multiple different constraints that we could pick for this
4723 /// operand (e.g. "imr") try to pick the 'best' one.
4724 /// This is somewhat tricky: constraints fall into four classes:
4725 ///    Other         -> immediates and magic values
4726 ///    Register      -> one specific register
4727 ///    RegisterClass -> a group of regs
4728 ///    Memory        -> memory
4729 /// Ideally, we would pick the most specific constraint possible: if we have
4730 /// something that fits into a register, we would pick it.  The problem here
4731 /// is that if we have something that could either be in a register or in
4732 /// memory that use of the register could cause selection of *other*
4733 /// operands to fail: they might only succeed if we pick memory.  Because of
4734 /// this the heuristic we use is:
4735 ///
4736 ///  1) If there is an 'other' constraint, and if the operand is valid for
4737 ///     that constraint, use it.  This makes us take advantage of 'i'
4738 ///     constraints when available.
4739 ///  2) Otherwise, pick the most general constraint present.  This prefers
4740 ///     'm' over 'r', for example.
4741 ///
4742 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4743                              const TargetLowering &TLI,
4744                              SDValue Op, SelectionDAG *DAG) {
4745   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4746   unsigned BestIdx = 0;
4747   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4748   int BestGenerality = -1;
4749 
4750   // Loop over the options, keeping track of the most general one.
4751   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4752     TargetLowering::ConstraintType CType =
4753       TLI.getConstraintType(OpInfo.Codes[i]);
4754 
4755     // Indirect 'other' or 'immediate' constraints are not allowed.
4756     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4757                                CType == TargetLowering::C_Register ||
4758                                CType == TargetLowering::C_RegisterClass))
4759       continue;
4760 
4761     // If this is an 'other' or 'immediate' constraint, see if the operand is
4762     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4763     // the operand is an integer in the range [0..31] we want to use I (saving a
4764     // load of a register), otherwise we must use 'r'.
4765     if ((CType == TargetLowering::C_Other ||
4766          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4767       assert(OpInfo.Codes[i].size() == 1 &&
4768              "Unhandled multi-letter 'other' constraint");
4769       std::vector<SDValue> ResultOps;
4770       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4771                                        ResultOps, *DAG);
4772       if (!ResultOps.empty()) {
4773         BestType = CType;
4774         BestIdx = i;
4775         break;
4776       }
4777     }
4778 
4779     // Things with matching constraints can only be registers, per gcc
4780     // documentation.  This mainly affects "g" constraints.
4781     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4782       continue;
4783 
4784     // This constraint letter is more general than the previous one, use it.
4785     int Generality = getConstraintGenerality(CType);
4786     if (Generality > BestGenerality) {
4787       BestType = CType;
4788       BestIdx = i;
4789       BestGenerality = Generality;
4790     }
4791   }
4792 
4793   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4794   OpInfo.ConstraintType = BestType;
4795 }
4796 
4797 /// Determines the constraint code and constraint type to use for the specific
4798 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4799 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4800                                             SDValue Op,
4801                                             SelectionDAG *DAG) const {
4802   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4803 
4804   // Single-letter constraints ('r') are very common.
4805   if (OpInfo.Codes.size() == 1) {
4806     OpInfo.ConstraintCode = OpInfo.Codes[0];
4807     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4808   } else {
4809     ChooseConstraint(OpInfo, *this, Op, DAG);
4810   }
4811 
4812   // 'X' matches anything.
4813   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4814     // Labels and constants are handled elsewhere ('X' is the only thing
4815     // that matches labels).  For Functions, the type here is the type of
4816     // the result, which is not what we want to look at; leave them alone.
4817     Value *v = OpInfo.CallOperandVal;
4818     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4819       OpInfo.CallOperandVal = v;
4820       return;
4821     }
4822 
4823     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4824       return;
4825 
4826     // Otherwise, try to resolve it to something we know about by looking at
4827     // the actual operand type.
4828     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4829       OpInfo.ConstraintCode = Repl;
4830       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4831     }
4832   }
4833 }
4834 
4835 /// Given an exact SDIV by a constant, create a multiplication
4836 /// with the multiplicative inverse of the constant.
4837 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4838                               const SDLoc &dl, SelectionDAG &DAG,
4839                               SmallVectorImpl<SDNode *> &Created) {
4840   SDValue Op0 = N->getOperand(0);
4841   SDValue Op1 = N->getOperand(1);
4842   EVT VT = N->getValueType(0);
4843   EVT SVT = VT.getScalarType();
4844   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4845   EVT ShSVT = ShVT.getScalarType();
4846 
4847   bool UseSRA = false;
4848   SmallVector<SDValue, 16> Shifts, Factors;
4849 
4850   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4851     if (C->isNullValue())
4852       return false;
4853     APInt Divisor = C->getAPIntValue();
4854     unsigned Shift = Divisor.countTrailingZeros();
4855     if (Shift) {
4856       Divisor.ashrInPlace(Shift);
4857       UseSRA = true;
4858     }
4859     // Calculate the multiplicative inverse, using Newton's method.
4860     APInt t;
4861     APInt Factor = Divisor;
4862     while ((t = Divisor * Factor) != 1)
4863       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4864     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4865     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4866     return true;
4867   };
4868 
4869   // Collect all magic values from the build vector.
4870   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4871     return SDValue();
4872 
4873   SDValue Shift, Factor;
4874   if (VT.isVector()) {
4875     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4876     Factor = DAG.getBuildVector(VT, dl, Factors);
4877   } else {
4878     Shift = Shifts[0];
4879     Factor = Factors[0];
4880   }
4881 
4882   SDValue Res = Op0;
4883 
4884   // Shift the value upfront if it is even, so the LSB is one.
4885   if (UseSRA) {
4886     // TODO: For UDIV use SRL instead of SRA.
4887     SDNodeFlags Flags;
4888     Flags.setExact(true);
4889     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4890     Created.push_back(Res.getNode());
4891   }
4892 
4893   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4894 }
4895 
4896 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4897                               SelectionDAG &DAG,
4898                               SmallVectorImpl<SDNode *> &Created) const {
4899   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4900   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4901   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4902     return SDValue(N, 0); // Lower SDIV as SDIV
4903   return SDValue();
4904 }
4905 
4906 /// Given an ISD::SDIV node expressing a divide by constant,
4907 /// return a DAG expression to select that will generate the same value by
4908 /// multiplying by a magic number.
4909 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4910 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4911                                   bool IsAfterLegalization,
4912                                   SmallVectorImpl<SDNode *> &Created) const {
4913   SDLoc dl(N);
4914   EVT VT = N->getValueType(0);
4915   EVT SVT = VT.getScalarType();
4916   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4917   EVT ShSVT = ShVT.getScalarType();
4918   unsigned EltBits = VT.getScalarSizeInBits();
4919 
4920   // Check to see if we can do this.
4921   // FIXME: We should be more aggressive here.
4922   if (!isTypeLegal(VT))
4923     return SDValue();
4924 
4925   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4926   if (N->getFlags().hasExact())
4927     return BuildExactSDIV(*this, N, dl, DAG, Created);
4928 
4929   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4930 
4931   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4932     if (C->isNullValue())
4933       return false;
4934 
4935     const APInt &Divisor = C->getAPIntValue();
4936     APInt::ms magics = Divisor.magic();
4937     int NumeratorFactor = 0;
4938     int ShiftMask = -1;
4939 
4940     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4941       // If d is +1/-1, we just multiply the numerator by +1/-1.
4942       NumeratorFactor = Divisor.getSExtValue();
4943       magics.m = 0;
4944       magics.s = 0;
4945       ShiftMask = 0;
4946     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4947       // If d > 0 and m < 0, add the numerator.
4948       NumeratorFactor = 1;
4949     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4950       // If d < 0 and m > 0, subtract the numerator.
4951       NumeratorFactor = -1;
4952     }
4953 
4954     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4955     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4956     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4957     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4958     return true;
4959   };
4960 
4961   SDValue N0 = N->getOperand(0);
4962   SDValue N1 = N->getOperand(1);
4963 
4964   // Collect the shifts / magic values from each element.
4965   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4966     return SDValue();
4967 
4968   SDValue MagicFactor, Factor, Shift, ShiftMask;
4969   if (VT.isVector()) {
4970     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4971     Factor = DAG.getBuildVector(VT, dl, Factors);
4972     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4973     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4974   } else {
4975     MagicFactor = MagicFactors[0];
4976     Factor = Factors[0];
4977     Shift = Shifts[0];
4978     ShiftMask = ShiftMasks[0];
4979   }
4980 
4981   // Multiply the numerator (operand 0) by the magic value.
4982   // FIXME: We should support doing a MUL in a wider type.
4983   SDValue Q;
4984   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4985                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4986     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4987   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4988                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4989     SDValue LoHi =
4990         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4991     Q = SDValue(LoHi.getNode(), 1);
4992   } else
4993     return SDValue(); // No mulhs or equivalent.
4994   Created.push_back(Q.getNode());
4995 
4996   // (Optionally) Add/subtract the numerator using Factor.
4997   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4998   Created.push_back(Factor.getNode());
4999   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5000   Created.push_back(Q.getNode());
5001 
5002   // Shift right algebraic by shift value.
5003   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5004   Created.push_back(Q.getNode());
5005 
5006   // Extract the sign bit, mask it and add it to the quotient.
5007   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5008   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5009   Created.push_back(T.getNode());
5010   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5011   Created.push_back(T.getNode());
5012   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5013 }
5014 
5015 /// Given an ISD::UDIV node expressing a divide by constant,
5016 /// return a DAG expression to select that will generate the same value by
5017 /// multiplying by a magic number.
5018 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5019 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5020                                   bool IsAfterLegalization,
5021                                   SmallVectorImpl<SDNode *> &Created) const {
5022   SDLoc dl(N);
5023   EVT VT = N->getValueType(0);
5024   EVT SVT = VT.getScalarType();
5025   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5026   EVT ShSVT = ShVT.getScalarType();
5027   unsigned EltBits = VT.getScalarSizeInBits();
5028 
5029   // Check to see if we can do this.
5030   // FIXME: We should be more aggressive here.
5031   if (!isTypeLegal(VT))
5032     return SDValue();
5033 
5034   bool UseNPQ = false;
5035   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5036 
5037   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5038     if (C->isNullValue())
5039       return false;
5040     // FIXME: We should use a narrower constant when the upper
5041     // bits are known to be zero.
5042     APInt Divisor = C->getAPIntValue();
5043     APInt::mu magics = Divisor.magicu();
5044     unsigned PreShift = 0, PostShift = 0;
5045 
5046     // If the divisor is even, we can avoid using the expensive fixup by
5047     // shifting the divided value upfront.
5048     if (magics.a != 0 && !Divisor[0]) {
5049       PreShift = Divisor.countTrailingZeros();
5050       // Get magic number for the shifted divisor.
5051       magics = Divisor.lshr(PreShift).magicu(PreShift);
5052       assert(magics.a == 0 && "Should use cheap fixup now");
5053     }
5054 
5055     APInt Magic = magics.m;
5056 
5057     unsigned SelNPQ;
5058     if (magics.a == 0 || Divisor.isOneValue()) {
5059       assert(magics.s < Divisor.getBitWidth() &&
5060              "We shouldn't generate an undefined shift!");
5061       PostShift = magics.s;
5062       SelNPQ = false;
5063     } else {
5064       PostShift = magics.s - 1;
5065       SelNPQ = true;
5066     }
5067 
5068     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5069     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5070     NPQFactors.push_back(
5071         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5072                                : APInt::getNullValue(EltBits),
5073                         dl, SVT));
5074     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5075     UseNPQ |= SelNPQ;
5076     return true;
5077   };
5078 
5079   SDValue N0 = N->getOperand(0);
5080   SDValue N1 = N->getOperand(1);
5081 
5082   // Collect the shifts/magic values from each element.
5083   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5084     return SDValue();
5085 
5086   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5087   if (VT.isVector()) {
5088     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5089     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5090     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5091     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5092   } else {
5093     PreShift = PreShifts[0];
5094     MagicFactor = MagicFactors[0];
5095     PostShift = PostShifts[0];
5096   }
5097 
5098   SDValue Q = N0;
5099   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5100   Created.push_back(Q.getNode());
5101 
5102   // FIXME: We should support doing a MUL in a wider type.
5103   auto GetMULHU = [&](SDValue X, SDValue Y) {
5104     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
5105                             : isOperationLegalOrCustom(ISD::MULHU, VT))
5106       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5107     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
5108                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
5109       SDValue LoHi =
5110           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5111       return SDValue(LoHi.getNode(), 1);
5112     }
5113     return SDValue(); // No mulhu or equivalent
5114   };
5115 
5116   // Multiply the numerator (operand 0) by the magic value.
5117   Q = GetMULHU(Q, MagicFactor);
5118   if (!Q)
5119     return SDValue();
5120 
5121   Created.push_back(Q.getNode());
5122 
5123   if (UseNPQ) {
5124     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5125     Created.push_back(NPQ.getNode());
5126 
5127     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5128     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5129     if (VT.isVector())
5130       NPQ = GetMULHU(NPQ, NPQFactor);
5131     else
5132       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5133 
5134     Created.push_back(NPQ.getNode());
5135 
5136     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5137     Created.push_back(Q.getNode());
5138   }
5139 
5140   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5141   Created.push_back(Q.getNode());
5142 
5143   SDValue One = DAG.getConstant(1, dl, VT);
5144   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
5145   return DAG.getSelect(dl, VT, IsOne, N0, Q);
5146 }
5147 
5148 /// If all values in Values that *don't* match the predicate are same 'splat'
5149 /// value, then replace all values with that splat value.
5150 /// Else, if AlternativeReplacement was provided, then replace all values that
5151 /// do match predicate with AlternativeReplacement value.
5152 static void
5153 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5154                           std::function<bool(SDValue)> Predicate,
5155                           SDValue AlternativeReplacement = SDValue()) {
5156   SDValue Replacement;
5157   // Is there a value for which the Predicate does *NOT* match? What is it?
5158   auto SplatValue = llvm::find_if_not(Values, Predicate);
5159   if (SplatValue != Values.end()) {
5160     // Does Values consist only of SplatValue's and values matching Predicate?
5161     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5162           return Value == *SplatValue || Predicate(Value);
5163         })) // Then we shall replace values matching predicate with SplatValue.
5164       Replacement = *SplatValue;
5165   }
5166   if (!Replacement) {
5167     // Oops, we did not find the "baseline" splat value.
5168     if (!AlternativeReplacement)
5169       return; // Nothing to do.
5170     // Let's replace with provided value then.
5171     Replacement = AlternativeReplacement;
5172   }
5173   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5174 }
5175 
5176 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5177 /// where the divisor is constant and the comparison target is zero,
5178 /// return a DAG expression that will generate the same comparison result
5179 /// using only multiplications, additions and shifts/rotations.
5180 /// Ref: "Hacker's Delight" 10-17.
5181 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5182                                         SDValue CompTargetNode,
5183                                         ISD::CondCode Cond,
5184                                         DAGCombinerInfo &DCI,
5185                                         const SDLoc &DL) const {
5186   SmallVector<SDNode *, 5> Built;
5187   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5188                                          DCI, DL, Built)) {
5189     for (SDNode *N : Built)
5190       DCI.AddToWorklist(N);
5191     return Folded;
5192   }
5193 
5194   return SDValue();
5195 }
5196 
5197 SDValue
5198 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5199                                   SDValue CompTargetNode, ISD::CondCode Cond,
5200                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5201                                   SmallVectorImpl<SDNode *> &Created) const {
5202   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5203   // - D must be constant, with D = D0 * 2^K where D0 is odd
5204   // - P is the multiplicative inverse of D0 modulo 2^W
5205   // - Q = floor(((2^W) - 1) / D)
5206   // where W is the width of the common type of N and D.
5207   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5208          "Only applicable for (in)equality comparisons.");
5209 
5210   SelectionDAG &DAG = DCI.DAG;
5211 
5212   EVT VT = REMNode.getValueType();
5213   EVT SVT = VT.getScalarType();
5214   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5215   EVT ShSVT = ShVT.getScalarType();
5216 
5217   // If MUL is unavailable, we cannot proceed in any case.
5218   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5219     return SDValue();
5220 
5221   bool ComparingWithAllZeros = true;
5222   bool AllComparisonsWithNonZerosAreTautological = true;
5223   bool HadTautologicalLanes = false;
5224   bool AllLanesAreTautological = true;
5225   bool HadEvenDivisor = false;
5226   bool AllDivisorsArePowerOfTwo = true;
5227   bool HadTautologicalInvertedLanes = false;
5228   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5229 
5230   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5231     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5232     if (CDiv->isNullValue())
5233       return false;
5234 
5235     const APInt &D = CDiv->getAPIntValue();
5236     const APInt &Cmp = CCmp->getAPIntValue();
5237 
5238     ComparingWithAllZeros &= Cmp.isNullValue();
5239 
5240     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5241     // if C2 is not less than C1, the comparison is always false.
5242     // But we will only be able to produce the comparison that will give the
5243     // opposive tautological answer. So this lane would need to be fixed up.
5244     bool TautologicalInvertedLane = D.ule(Cmp);
5245     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5246 
5247     // If all lanes are tautological (either all divisors are ones, or divisor
5248     // is not greater than the constant we are comparing with),
5249     // we will prefer to avoid the fold.
5250     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
5251     HadTautologicalLanes |= TautologicalLane;
5252     AllLanesAreTautological &= TautologicalLane;
5253 
5254     // If we are comparing with non-zero, we need'll need  to subtract said
5255     // comparison value from the LHS. But there is no point in doing that if
5256     // every lane where we are comparing with non-zero is tautological..
5257     if (!Cmp.isNullValue())
5258       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5259 
5260     // Decompose D into D0 * 2^K
5261     unsigned K = D.countTrailingZeros();
5262     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5263     APInt D0 = D.lshr(K);
5264 
5265     // D is even if it has trailing zeros.
5266     HadEvenDivisor |= (K != 0);
5267     // D is a power-of-two if D0 is one.
5268     // If all divisors are power-of-two, we will prefer to avoid the fold.
5269     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5270 
5271     // P = inv(D0, 2^W)
5272     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5273     unsigned W = D.getBitWidth();
5274     APInt P = D0.zext(W + 1)
5275                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5276                   .trunc(W);
5277     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5278     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5279 
5280     // Q = floor((2^W - 1) u/ D)
5281     // R = ((2^W - 1) u% D)
5282     APInt Q, R;
5283     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5284 
5285     // If we are comparing with zero, then that comparison constant is okay,
5286     // else it may need to be one less than that.
5287     if (Cmp.ugt(R))
5288       Q -= 1;
5289 
5290     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5291            "We are expecting that K is always less than all-ones for ShSVT");
5292 
5293     // If the lane is tautological the result can be constant-folded.
5294     if (TautologicalLane) {
5295       // Set P and K amount to a bogus values so we can try to splat them.
5296       P = 0;
5297       K = -1;
5298       // And ensure that comparison constant is tautological,
5299       // it will always compare true/false.
5300       Q = -1;
5301     }
5302 
5303     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5304     KAmts.push_back(
5305         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5306     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5307     return true;
5308   };
5309 
5310   SDValue N = REMNode.getOperand(0);
5311   SDValue D = REMNode.getOperand(1);
5312 
5313   // Collect the values from each element.
5314   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5315     return SDValue();
5316 
5317   // If all lanes are tautological, the result can be constant-folded.
5318   if (AllLanesAreTautological)
5319     return SDValue();
5320 
5321   // If this is a urem by a powers-of-two, avoid the fold since it can be
5322   // best implemented as a bit test.
5323   if (AllDivisorsArePowerOfTwo)
5324     return SDValue();
5325 
5326   SDValue PVal, KVal, QVal;
5327   if (VT.isVector()) {
5328     if (HadTautologicalLanes) {
5329       // Try to turn PAmts into a splat, since we don't care about the values
5330       // that are currently '0'. If we can't, just keep '0'`s.
5331       turnVectorIntoSplatVector(PAmts, isNullConstant);
5332       // Try to turn KAmts into a splat, since we don't care about the values
5333       // that are currently '-1'. If we can't, change them to '0'`s.
5334       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5335                                 DAG.getConstant(0, DL, ShSVT));
5336     }
5337 
5338     PVal = DAG.getBuildVector(VT, DL, PAmts);
5339     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5340     QVal = DAG.getBuildVector(VT, DL, QAmts);
5341   } else {
5342     PVal = PAmts[0];
5343     KVal = KAmts[0];
5344     QVal = QAmts[0];
5345   }
5346 
5347   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5348     if (!isOperationLegalOrCustom(ISD::SUB, VT))
5349       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5350     assert(CompTargetNode.getValueType() == N.getValueType() &&
5351            "Expecting that the types on LHS and RHS of comparisons match.");
5352     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5353   }
5354 
5355   // (mul N, P)
5356   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5357   Created.push_back(Op0.getNode());
5358 
5359   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5360   // divisors as a performance improvement, since rotating by 0 is a no-op.
5361   if (HadEvenDivisor) {
5362     // We need ROTR to do this.
5363     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5364       return SDValue();
5365     SDNodeFlags Flags;
5366     Flags.setExact(true);
5367     // UREM: (rotr (mul N, P), K)
5368     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5369     Created.push_back(Op0.getNode());
5370   }
5371 
5372   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5373   SDValue NewCC =
5374       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5375                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5376   if (!HadTautologicalInvertedLanes)
5377     return NewCC;
5378 
5379   // If any lanes previously compared always-false, the NewCC will give
5380   // always-true result for them, so we need to fixup those lanes.
5381   // Or the other way around for inequality predicate.
5382   assert(VT.isVector() && "Can/should only get here for vectors.");
5383   Created.push_back(NewCC.getNode());
5384 
5385   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5386   // if C2 is not less than C1, the comparison is always false.
5387   // But we have produced the comparison that will give the
5388   // opposive tautological answer. So these lanes would need to be fixed up.
5389   SDValue TautologicalInvertedChannels =
5390       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5391   Created.push_back(TautologicalInvertedChannels.getNode());
5392 
5393   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5394     // If we have a vector select, let's replace the comparison results in the
5395     // affected lanes with the correct tautological result.
5396     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5397                                               DL, SETCCVT, SETCCVT);
5398     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5399                        Replacement, NewCC);
5400   }
5401 
5402   // Else, we can just invert the comparison result in the appropriate lanes.
5403   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5404     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5405                        TautologicalInvertedChannels);
5406 
5407   return SDValue(); // Don't know how to lower.
5408 }
5409 
5410 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5411 /// where the divisor is constant and the comparison target is zero,
5412 /// return a DAG expression that will generate the same comparison result
5413 /// using only multiplications, additions and shifts/rotations.
5414 /// Ref: "Hacker's Delight" 10-17.
5415 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5416                                         SDValue CompTargetNode,
5417                                         ISD::CondCode Cond,
5418                                         DAGCombinerInfo &DCI,
5419                                         const SDLoc &DL) const {
5420   SmallVector<SDNode *, 7> Built;
5421   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5422                                          DCI, DL, Built)) {
5423     assert(Built.size() <= 7 && "Max size prediction failed.");
5424     for (SDNode *N : Built)
5425       DCI.AddToWorklist(N);
5426     return Folded;
5427   }
5428 
5429   return SDValue();
5430 }
5431 
5432 SDValue
5433 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5434                                   SDValue CompTargetNode, ISD::CondCode Cond,
5435                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5436                                   SmallVectorImpl<SDNode *> &Created) const {
5437   // Fold:
5438   //   (seteq/ne (srem N, D), 0)
5439   // To:
5440   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5441   //
5442   // - D must be constant, with D = D0 * 2^K where D0 is odd
5443   // - P is the multiplicative inverse of D0 modulo 2^W
5444   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5445   // - Q = floor((2 * A) / (2^K))
5446   // where W is the width of the common type of N and D.
5447   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5448          "Only applicable for (in)equality comparisons.");
5449 
5450   SelectionDAG &DAG = DCI.DAG;
5451 
5452   EVT VT = REMNode.getValueType();
5453   EVT SVT = VT.getScalarType();
5454   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5455   EVT ShSVT = ShVT.getScalarType();
5456 
5457   // If MUL is unavailable, we cannot proceed in any case.
5458   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5459     return SDValue();
5460 
5461   // TODO: Could support comparing with non-zero too.
5462   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5463   if (!CompTarget || !CompTarget->isNullValue())
5464     return SDValue();
5465 
5466   bool HadIntMinDivisor = false;
5467   bool HadOneDivisor = false;
5468   bool AllDivisorsAreOnes = true;
5469   bool HadEvenDivisor = false;
5470   bool NeedToApplyOffset = false;
5471   bool AllDivisorsArePowerOfTwo = true;
5472   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5473 
5474   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5475     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5476     if (C->isNullValue())
5477       return false;
5478 
5479     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5480 
5481     // WARNING: this fold is only valid for positive divisors!
5482     APInt D = C->getAPIntValue();
5483     if (D.isNegative())
5484       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5485 
5486     HadIntMinDivisor |= D.isMinSignedValue();
5487 
5488     // If all divisors are ones, we will prefer to avoid the fold.
5489     HadOneDivisor |= D.isOneValue();
5490     AllDivisorsAreOnes &= D.isOneValue();
5491 
5492     // Decompose D into D0 * 2^K
5493     unsigned K = D.countTrailingZeros();
5494     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5495     APInt D0 = D.lshr(K);
5496 
5497     if (!D.isMinSignedValue()) {
5498       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5499       // we don't care about this lane in this fold, we'll special-handle it.
5500       HadEvenDivisor |= (K != 0);
5501     }
5502 
5503     // D is a power-of-two if D0 is one. This includes INT_MIN.
5504     // If all divisors are power-of-two, we will prefer to avoid the fold.
5505     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5506 
5507     // P = inv(D0, 2^W)
5508     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5509     unsigned W = D.getBitWidth();
5510     APInt P = D0.zext(W + 1)
5511                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5512                   .trunc(W);
5513     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5514     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5515 
5516     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5517     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5518     A.clearLowBits(K);
5519 
5520     if (!D.isMinSignedValue()) {
5521       // If divisor INT_MIN, then we don't care about this lane in this fold,
5522       // we'll special-handle it.
5523       NeedToApplyOffset |= A != 0;
5524     }
5525 
5526     // Q = floor((2 * A) / (2^K))
5527     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5528 
5529     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5530            "We are expecting that A is always less than all-ones for SVT");
5531     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5532            "We are expecting that K is always less than all-ones for ShSVT");
5533 
5534     // If the divisor is 1 the result can be constant-folded. Likewise, we
5535     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5536     if (D.isOneValue()) {
5537       // Set P, A and K to a bogus values so we can try to splat them.
5538       P = 0;
5539       A = -1;
5540       K = -1;
5541 
5542       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5543       Q = -1;
5544     }
5545 
5546     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5547     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5548     KAmts.push_back(
5549         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5550     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5551     return true;
5552   };
5553 
5554   SDValue N = REMNode.getOperand(0);
5555   SDValue D = REMNode.getOperand(1);
5556 
5557   // Collect the values from each element.
5558   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5559     return SDValue();
5560 
5561   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5562   if (AllDivisorsAreOnes)
5563     return SDValue();
5564 
5565   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5566   // since it can be best implemented as a bit test.
5567   if (AllDivisorsArePowerOfTwo)
5568     return SDValue();
5569 
5570   SDValue PVal, AVal, KVal, QVal;
5571   if (VT.isVector()) {
5572     if (HadOneDivisor) {
5573       // Try to turn PAmts into a splat, since we don't care about the values
5574       // that are currently '0'. If we can't, just keep '0'`s.
5575       turnVectorIntoSplatVector(PAmts, isNullConstant);
5576       // Try to turn AAmts into a splat, since we don't care about the
5577       // values that are currently '-1'. If we can't, change them to '0'`s.
5578       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5579                                 DAG.getConstant(0, DL, SVT));
5580       // Try to turn KAmts into a splat, since we don't care about the values
5581       // that are currently '-1'. If we can't, change them to '0'`s.
5582       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5583                                 DAG.getConstant(0, DL, ShSVT));
5584     }
5585 
5586     PVal = DAG.getBuildVector(VT, DL, PAmts);
5587     AVal = DAG.getBuildVector(VT, DL, AAmts);
5588     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5589     QVal = DAG.getBuildVector(VT, DL, QAmts);
5590   } else {
5591     PVal = PAmts[0];
5592     AVal = AAmts[0];
5593     KVal = KAmts[0];
5594     QVal = QAmts[0];
5595   }
5596 
5597   // (mul N, P)
5598   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5599   Created.push_back(Op0.getNode());
5600 
5601   if (NeedToApplyOffset) {
5602     // We need ADD to do this.
5603     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5604       return SDValue();
5605 
5606     // (add (mul N, P), A)
5607     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5608     Created.push_back(Op0.getNode());
5609   }
5610 
5611   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5612   // divisors as a performance improvement, since rotating by 0 is a no-op.
5613   if (HadEvenDivisor) {
5614     // We need ROTR to do this.
5615     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5616       return SDValue();
5617     SDNodeFlags Flags;
5618     Flags.setExact(true);
5619     // SREM: (rotr (add (mul N, P), A), K)
5620     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5621     Created.push_back(Op0.getNode());
5622   }
5623 
5624   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5625   SDValue Fold =
5626       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5627                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5628 
5629   // If we didn't have lanes with INT_MIN divisor, then we're done.
5630   if (!HadIntMinDivisor)
5631     return Fold;
5632 
5633   // That fold is only valid for positive divisors. Which effectively means,
5634   // it is invalid for INT_MIN divisors. So if we have such a lane,
5635   // we must fix-up results for said lanes.
5636   assert(VT.isVector() && "Can/should only get here for vectors.");
5637 
5638   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5639       !isOperationLegalOrCustom(ISD::AND, VT) ||
5640       !isOperationLegalOrCustom(Cond, VT) ||
5641       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5642     return SDValue();
5643 
5644   Created.push_back(Fold.getNode());
5645 
5646   SDValue IntMin = DAG.getConstant(
5647       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5648   SDValue IntMax = DAG.getConstant(
5649       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5650   SDValue Zero =
5651       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5652 
5653   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5654   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5655   Created.push_back(DivisorIsIntMin.getNode());
5656 
5657   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5658   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5659   Created.push_back(Masked.getNode());
5660   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5661   Created.push_back(MaskedIsZero.getNode());
5662 
5663   // To produce final result we need to blend 2 vectors: 'SetCC' and
5664   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5665   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5666   // constant-folded, select can get lowered to a shuffle with constant mask.
5667   SDValue Blended =
5668       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5669 
5670   return Blended;
5671 }
5672 
5673 bool TargetLowering::
5674 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5675   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5676     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5677                                 "be a constant integer");
5678     return true;
5679   }
5680 
5681   return false;
5682 }
5683 
5684 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5685                                              bool LegalOps, bool OptForSize,
5686                                              NegatibleCost &Cost,
5687                                              unsigned Depth) const {
5688   // fneg is removable even if it has multiple uses.
5689   if (Op.getOpcode() == ISD::FNEG) {
5690     Cost = NegatibleCost::Cheaper;
5691     return Op.getOperand(0);
5692   }
5693 
5694   // Don't recurse exponentially.
5695   if (Depth > SelectionDAG::MaxRecursionDepth)
5696     return SDValue();
5697 
5698   // Pre-increment recursion depth for use in recursive calls.
5699   ++Depth;
5700   const SDNodeFlags Flags = Op->getFlags();
5701   const TargetOptions &Options = DAG.getTarget().Options;
5702   EVT VT = Op.getValueType();
5703   unsigned Opcode = Op.getOpcode();
5704 
5705   // Don't allow anything with multiple uses unless we know it is free.
5706   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
5707     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
5708                         isFPExtFree(VT, Op.getOperand(0).getValueType());
5709     if (!IsFreeExtend)
5710       return SDValue();
5711   }
5712 
5713   SDLoc DL(Op);
5714 
5715   switch (Opcode) {
5716   case ISD::ConstantFP: {
5717     // Don't invert constant FP values after legalization unless the target says
5718     // the negated constant is legal.
5719     bool IsOpLegal =
5720         isOperationLegal(ISD::ConstantFP, VT) ||
5721         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5722                      OptForSize);
5723 
5724     if (LegalOps && !IsOpLegal)
5725       break;
5726 
5727     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5728     V.changeSign();
5729     SDValue CFP = DAG.getConstantFP(V, DL, VT);
5730 
5731     // If we already have the use of the negated floating constant, it is free
5732     // to negate it even it has multiple uses.
5733     if (!Op.hasOneUse() && CFP.use_empty())
5734       break;
5735     Cost = NegatibleCost::Neutral;
5736     return CFP;
5737   }
5738   case ISD::BUILD_VECTOR: {
5739     // Only permit BUILD_VECTOR of constants.
5740     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5741           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5742         }))
5743       break;
5744 
5745     bool IsOpLegal =
5746         (isOperationLegal(ISD::ConstantFP, VT) &&
5747          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
5748         llvm::all_of(Op->op_values(), [&](SDValue N) {
5749           return N.isUndef() ||
5750                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5751                               OptForSize);
5752         });
5753 
5754     if (LegalOps && !IsOpLegal)
5755       break;
5756 
5757     SmallVector<SDValue, 4> Ops;
5758     for (SDValue C : Op->op_values()) {
5759       if (C.isUndef()) {
5760         Ops.push_back(C);
5761         continue;
5762       }
5763       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5764       V.changeSign();
5765       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
5766     }
5767     Cost = NegatibleCost::Neutral;
5768     return DAG.getBuildVector(VT, DL, Ops);
5769   }
5770   case ISD::FADD: {
5771     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5772       break;
5773 
5774     // After operation legalization, it might not be legal to create new FSUBs.
5775     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
5776       break;
5777     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5778 
5779     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
5780     NegatibleCost CostX = NegatibleCost::Expensive;
5781     SDValue NegX =
5782         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5783     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
5784     NegatibleCost CostY = NegatibleCost::Expensive;
5785     SDValue NegY =
5786         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5787 
5788     // Negate the X if its cost is less or equal than Y.
5789     if (NegX && (CostX <= CostY)) {
5790       Cost = CostX;
5791       return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
5792     }
5793 
5794     // Negate the Y if it is not expensive.
5795     if (NegY) {
5796       Cost = CostY;
5797       return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
5798     }
5799     break;
5800   }
5801   case ISD::FSUB: {
5802     // We can't turn -(A-B) into B-A when we honor signed zeros.
5803     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5804       break;
5805 
5806     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5807     // fold (fneg (fsub 0, Y)) -> Y
5808     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
5809       if (C->isZero()) {
5810         Cost = NegatibleCost::Cheaper;
5811         return Y;
5812       }
5813 
5814     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
5815     Cost = NegatibleCost::Neutral;
5816     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
5817   }
5818   case ISD::FMUL:
5819   case ISD::FDIV: {
5820     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5821 
5822     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5823     NegatibleCost CostX = NegatibleCost::Expensive;
5824     SDValue NegX =
5825         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5826     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5827     NegatibleCost CostY = NegatibleCost::Expensive;
5828     SDValue NegY =
5829         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5830 
5831     // Negate the X if its cost is less or equal than Y.
5832     if (NegX && (CostX <= CostY)) {
5833       Cost = CostX;
5834       return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
5835     }
5836 
5837     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5838     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5839       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5840         break;
5841 
5842     // Negate the Y if it is not expensive.
5843     if (NegY) {
5844       Cost = CostY;
5845       return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
5846     }
5847     break;
5848   }
5849   case ISD::FMA:
5850   case ISD::FMAD: {
5851     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5852       break;
5853 
5854     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
5855     NegatibleCost CostZ = NegatibleCost::Expensive;
5856     SDValue NegZ =
5857         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
5858     // Give up if fail to negate the Z.
5859     if (!NegZ)
5860       break;
5861 
5862     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5863     NegatibleCost CostX = NegatibleCost::Expensive;
5864     SDValue NegX =
5865         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5866     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5867     NegatibleCost CostY = NegatibleCost::Expensive;
5868     SDValue NegY =
5869         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5870 
5871     // Negate the X if its cost is less or equal than Y.
5872     if (NegX && (CostX <= CostY)) {
5873       Cost = std::min(CostX, CostZ);
5874       return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
5875     }
5876 
5877     // Negate the Y if it is not expensive.
5878     if (NegY) {
5879       Cost = std::min(CostY, CostZ);
5880       return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
5881     }
5882     break;
5883   }
5884 
5885   case ISD::FP_EXTEND:
5886   case ISD::FSIN:
5887     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
5888                                             OptForSize, Cost, Depth))
5889       return DAG.getNode(Opcode, DL, VT, NegV);
5890     break;
5891   case ISD::FP_ROUND:
5892     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
5893                                             OptForSize, Cost, Depth))
5894       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
5895     break;
5896   }
5897 
5898   return SDValue();
5899 }
5900 
5901 //===----------------------------------------------------------------------===//
5902 // Legalization Utilities
5903 //===----------------------------------------------------------------------===//
5904 
5905 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5906                                     SDValue LHS, SDValue RHS,
5907                                     SmallVectorImpl<SDValue> &Result,
5908                                     EVT HiLoVT, SelectionDAG &DAG,
5909                                     MulExpansionKind Kind, SDValue LL,
5910                                     SDValue LH, SDValue RL, SDValue RH) const {
5911   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5912          Opcode == ISD::SMUL_LOHI);
5913 
5914   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5915                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5916   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5917                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5918   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5919                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5920   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5921                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5922 
5923   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5924     return false;
5925 
5926   unsigned OuterBitSize = VT.getScalarSizeInBits();
5927   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5928   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5929   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5930 
5931   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5932   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5933          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5934 
5935   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5936   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5937                           bool Signed) -> bool {
5938     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5939       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5940       Hi = SDValue(Lo.getNode(), 1);
5941       return true;
5942     }
5943     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5944       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5945       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5946       return true;
5947     }
5948     return false;
5949   };
5950 
5951   SDValue Lo, Hi;
5952 
5953   if (!LL.getNode() && !RL.getNode() &&
5954       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5955     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5956     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5957   }
5958 
5959   if (!LL.getNode())
5960     return false;
5961 
5962   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5963   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5964       DAG.MaskedValueIsZero(RHS, HighMask)) {
5965     // The inputs are both zero-extended.
5966     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5967       Result.push_back(Lo);
5968       Result.push_back(Hi);
5969       if (Opcode != ISD::MUL) {
5970         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5971         Result.push_back(Zero);
5972         Result.push_back(Zero);
5973       }
5974       return true;
5975     }
5976   }
5977 
5978   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5979       RHSSB > InnerBitSize) {
5980     // The input values are both sign-extended.
5981     // TODO non-MUL case?
5982     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5983       Result.push_back(Lo);
5984       Result.push_back(Hi);
5985       return true;
5986     }
5987   }
5988 
5989   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5990   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5991   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5992     // FIXME getShiftAmountTy does not always return a sensible result when VT
5993     // is an illegal type, and so the type may be too small to fit the shift
5994     // amount. Override it with i32. The shift will have to be legalized.
5995     ShiftAmountTy = MVT::i32;
5996   }
5997   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5998 
5999   if (!LH.getNode() && !RH.getNode() &&
6000       isOperationLegalOrCustom(ISD::SRL, VT) &&
6001       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6002     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
6003     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6004     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6005     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6006   }
6007 
6008   if (!LH.getNode())
6009     return false;
6010 
6011   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6012     return false;
6013 
6014   Result.push_back(Lo);
6015 
6016   if (Opcode == ISD::MUL) {
6017     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6018     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6019     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6020     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6021     Result.push_back(Hi);
6022     return true;
6023   }
6024 
6025   // Compute the full width result.
6026   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6027     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6028     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6029     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6030     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6031   };
6032 
6033   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6034   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6035     return false;
6036 
6037   // This is effectively the add part of a multiply-add of half-sized operands,
6038   // so it cannot overflow.
6039   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6040 
6041   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6042     return false;
6043 
6044   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6045   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6046 
6047   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6048                   isOperationLegalOrCustom(ISD::ADDE, VT));
6049   if (UseGlue)
6050     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6051                        Merge(Lo, Hi));
6052   else
6053     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6054                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6055 
6056   SDValue Carry = Next.getValue(1);
6057   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6058   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6059 
6060   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6061     return false;
6062 
6063   if (UseGlue)
6064     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6065                      Carry);
6066   else
6067     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6068                      Zero, Carry);
6069 
6070   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6071 
6072   if (Opcode == ISD::SMUL_LOHI) {
6073     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6074                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6075     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6076 
6077     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6078                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6079     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6080   }
6081 
6082   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6083   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6084   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6085   return true;
6086 }
6087 
6088 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6089                                SelectionDAG &DAG, MulExpansionKind Kind,
6090                                SDValue LL, SDValue LH, SDValue RL,
6091                                SDValue RH) const {
6092   SmallVector<SDValue, 2> Result;
6093   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
6094                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6095                            DAG, Kind, LL, LH, RL, RH);
6096   if (Ok) {
6097     assert(Result.size() == 2);
6098     Lo = Result[0];
6099     Hi = Result[1];
6100   }
6101   return Ok;
6102 }
6103 
6104 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
6105                                        SelectionDAG &DAG) const {
6106   EVT VT = Node->getValueType(0);
6107 
6108   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6109                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6110                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6111                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6112     return false;
6113 
6114   // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6115   // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6116   SDValue X = Node->getOperand(0);
6117   SDValue Y = Node->getOperand(1);
6118   SDValue Z = Node->getOperand(2);
6119 
6120   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6121   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6122   SDLoc DL(SDValue(Node, 0));
6123 
6124   EVT ShVT = Z.getValueType();
6125   SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6126   SDValue ShAmt, InvShAmt;
6127   if (isPowerOf2_32(EltSizeInBits)) {
6128     // Z % BW -> Z & (BW - 1)
6129     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
6130     // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6131     InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
6132   } else {
6133     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6134     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6135     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
6136   }
6137 
6138   SDValue One = DAG.getConstant(1, DL, ShVT);
6139   SDValue ShX, ShY;
6140   if (IsFSHL) {
6141     ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
6142     SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
6143     ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
6144   } else {
6145     SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
6146     ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
6147     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
6148   }
6149   Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
6150   return true;
6151 }
6152 
6153 // TODO: Merge with expandFunnelShift.
6154 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
6155                                SelectionDAG &DAG) const {
6156   EVT VT = Node->getValueType(0);
6157   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6158   bool IsLeft = Node->getOpcode() == ISD::ROTL;
6159   SDValue Op0 = Node->getOperand(0);
6160   SDValue Op1 = Node->getOperand(1);
6161   SDLoc DL(SDValue(Node, 0));
6162 
6163   EVT ShVT = Op1.getValueType();
6164   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6165 
6166   // If a rotate in the other direction is legal, use it.
6167   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
6168   if (isOperationLegal(RevRot, VT)) {
6169     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
6170     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
6171     return true;
6172   }
6173 
6174   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6175                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6176                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6177                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
6178                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6179     return false;
6180 
6181   // Otherwise,
6182   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
6183   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
6184   //
6185   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
6186          "Expecting the type bitwidth to be a power of 2");
6187   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
6188   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
6189   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6190   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
6191   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
6192   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
6193   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
6194                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
6195   return true;
6196 }
6197 
6198 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
6199                                       SelectionDAG &DAG) const {
6200   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6201   SDValue Src = Node->getOperand(OpNo);
6202   EVT SrcVT = Src.getValueType();
6203   EVT DstVT = Node->getValueType(0);
6204   SDLoc dl(SDValue(Node, 0));
6205 
6206   // FIXME: Only f32 to i64 conversions are supported.
6207   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
6208     return false;
6209 
6210   if (Node->isStrictFPOpcode())
6211     // When a NaN is converted to an integer a trap is allowed. We can't
6212     // use this expansion here because it would eliminate that trap. Other
6213     // traps are also allowed and cannot be eliminated. See
6214     // IEEE 754-2008 sec 5.8.
6215     return false;
6216 
6217   // Expand f32 -> i64 conversion
6218   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6219   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
6220   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
6221   EVT IntVT = SrcVT.changeTypeToInteger();
6222   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
6223 
6224   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6225   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6226   SDValue Bias = DAG.getConstant(127, dl, IntVT);
6227   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6228   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6229   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6230 
6231   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6232 
6233   SDValue ExponentBits = DAG.getNode(
6234       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6235       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6236   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6237 
6238   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6239                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6240                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6241   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6242 
6243   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6244                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6245                           DAG.getConstant(0x00800000, dl, IntVT));
6246 
6247   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6248 
6249   R = DAG.getSelectCC(
6250       dl, Exponent, ExponentLoBit,
6251       DAG.getNode(ISD::SHL, dl, DstVT, R,
6252                   DAG.getZExtOrTrunc(
6253                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6254                       dl, IntShVT)),
6255       DAG.getNode(ISD::SRL, dl, DstVT, R,
6256                   DAG.getZExtOrTrunc(
6257                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6258                       dl, IntShVT)),
6259       ISD::SETGT);
6260 
6261   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6262                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6263 
6264   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6265                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6266   return true;
6267 }
6268 
6269 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6270                                       SDValue &Chain,
6271                                       SelectionDAG &DAG) const {
6272   SDLoc dl(SDValue(Node, 0));
6273   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6274   SDValue Src = Node->getOperand(OpNo);
6275 
6276   EVT SrcVT = Src.getValueType();
6277   EVT DstVT = Node->getValueType(0);
6278   EVT SetCCVT =
6279       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6280   EVT DstSetCCVT =
6281       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
6282 
6283   // Only expand vector types if we have the appropriate vector bit operations.
6284   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6285                                                    ISD::FP_TO_SINT;
6286   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6287                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6288     return false;
6289 
6290   // If the maximum float value is smaller then the signed integer range,
6291   // the destination signmask can't be represented by the float, so we can
6292   // just use FP_TO_SINT directly.
6293   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6294   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6295   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6296   if (APFloat::opOverflow &
6297       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6298     if (Node->isStrictFPOpcode()) {
6299       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6300                            { Node->getOperand(0), Src });
6301       Chain = Result.getValue(1);
6302     } else
6303       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6304     return true;
6305   }
6306 
6307   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6308   SDValue Sel;
6309 
6310   if (Node->isStrictFPOpcode()) {
6311     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6312                        Node->getOperand(0), /*IsSignaling*/ true);
6313     Chain = Sel.getValue(1);
6314   } else {
6315     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6316   }
6317 
6318   bool Strict = Node->isStrictFPOpcode() ||
6319                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6320 
6321   if (Strict) {
6322     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6323     // signmask then offset (the result of which should be fully representable).
6324     // Sel = Src < 0x8000000000000000
6325     // FltOfs = select Sel, 0, 0x8000000000000000
6326     // IntOfs = select Sel, 0, 0x8000000000000000
6327     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6328 
6329     // TODO: Should any fast-math-flags be set for the FSUB?
6330     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6331                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6332     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6333     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6334                                    DAG.getConstant(0, dl, DstVT),
6335                                    DAG.getConstant(SignMask, dl, DstVT));
6336     SDValue SInt;
6337     if (Node->isStrictFPOpcode()) {
6338       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6339                                 { Chain, Src, FltOfs });
6340       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6341                          { Val.getValue(1), Val });
6342       Chain = SInt.getValue(1);
6343     } else {
6344       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6345       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6346     }
6347     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6348   } else {
6349     // Expand based on maximum range of FP_TO_SINT:
6350     // True = fp_to_sint(Src)
6351     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6352     // Result = select (Src < 0x8000000000000000), True, False
6353 
6354     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6355     // TODO: Should any fast-math-flags be set for the FSUB?
6356     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6357                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6358     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6359                         DAG.getConstant(SignMask, dl, DstVT));
6360     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6361     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6362   }
6363   return true;
6364 }
6365 
6366 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6367                                       SDValue &Chain,
6368                                       SelectionDAG &DAG) const {
6369   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6370   SDValue Src = Node->getOperand(OpNo);
6371   EVT SrcVT = Src.getValueType();
6372   EVT DstVT = Node->getValueType(0);
6373 
6374   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
6375     return false;
6376 
6377   // Only expand vector types if we have the appropriate vector bit operations.
6378   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6379                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6380                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6381                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6382                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6383     return false;
6384 
6385   SDLoc dl(SDValue(Node, 0));
6386   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6387 
6388   // Implementation of unsigned i64 to f64 following the algorithm in
6389   // __floatundidf in compiler_rt. This implementation has the advantage
6390   // of performing rounding correctly, both in the default rounding mode
6391   // and in all alternate rounding modes.
6392   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6393   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6394       BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6395   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6396   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6397   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6398 
6399   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6400   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6401   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6402   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6403   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6404   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6405   if (Node->isStrictFPOpcode()) {
6406     SDValue HiSub =
6407         DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
6408                     {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
6409     Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
6410                          {HiSub.getValue(1), LoFlt, HiSub});
6411     Chain = Result.getValue(1);
6412   } else {
6413     SDValue HiSub =
6414         DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6415     Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6416   }
6417   return true;
6418 }
6419 
6420 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6421                                               SelectionDAG &DAG) const {
6422   SDLoc dl(Node);
6423   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6424     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6425   EVT VT = Node->getValueType(0);
6426   if (isOperationLegalOrCustom(NewOp, VT)) {
6427     SDValue Quiet0 = Node->getOperand(0);
6428     SDValue Quiet1 = Node->getOperand(1);
6429 
6430     if (!Node->getFlags().hasNoNaNs()) {
6431       // Insert canonicalizes if it's possible we need to quiet to get correct
6432       // sNaN behavior.
6433       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6434         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6435                              Node->getFlags());
6436       }
6437       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6438         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6439                              Node->getFlags());
6440       }
6441     }
6442 
6443     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6444   }
6445 
6446   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6447   // instead if there are no NaNs.
6448   if (Node->getFlags().hasNoNaNs()) {
6449     unsigned IEEE2018Op =
6450         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6451     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6452       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6453                          Node->getOperand(1), Node->getFlags());
6454     }
6455   }
6456 
6457   // If none of the above worked, but there are no NaNs, then expand to
6458   // a compare/select sequence.  This is required for correctness since
6459   // InstCombine might have canonicalized a fcmp+select sequence to a
6460   // FMINNUM/FMAXNUM node.  If we were to fall through to the default
6461   // expansion to libcall, we might introduce a link-time dependency
6462   // on libm into a file that originally did not have one.
6463   if (Node->getFlags().hasNoNaNs()) {
6464     ISD::CondCode Pred =
6465         Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6466     SDValue Op1 = Node->getOperand(0);
6467     SDValue Op2 = Node->getOperand(1);
6468     SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6469     // Copy FMF flags, but always set the no-signed-zeros flag
6470     // as this is implied by the FMINNUM/FMAXNUM semantics.
6471     SDNodeFlags Flags = Node->getFlags();
6472     Flags.setNoSignedZeros(true);
6473     SelCC->setFlags(Flags);
6474     return SelCC;
6475   }
6476 
6477   return SDValue();
6478 }
6479 
6480 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6481                                  SelectionDAG &DAG) const {
6482   SDLoc dl(Node);
6483   EVT VT = Node->getValueType(0);
6484   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6485   SDValue Op = Node->getOperand(0);
6486   unsigned Len = VT.getScalarSizeInBits();
6487   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6488 
6489   // TODO: Add support for irregular type lengths.
6490   if (!(Len <= 128 && Len % 8 == 0))
6491     return false;
6492 
6493   // Only expand vector types if we have the appropriate vector bit operations.
6494   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6495                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6496                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6497                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6498                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6499     return false;
6500 
6501   // This is the "best" algorithm from
6502   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6503   SDValue Mask55 =
6504       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6505   SDValue Mask33 =
6506       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6507   SDValue Mask0F =
6508       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6509   SDValue Mask01 =
6510       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6511 
6512   // v = v - ((v >> 1) & 0x55555555...)
6513   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6514                    DAG.getNode(ISD::AND, dl, VT,
6515                                DAG.getNode(ISD::SRL, dl, VT, Op,
6516                                            DAG.getConstant(1, dl, ShVT)),
6517                                Mask55));
6518   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6519   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6520                    DAG.getNode(ISD::AND, dl, VT,
6521                                DAG.getNode(ISD::SRL, dl, VT, Op,
6522                                            DAG.getConstant(2, dl, ShVT)),
6523                                Mask33));
6524   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6525   Op = DAG.getNode(ISD::AND, dl, VT,
6526                    DAG.getNode(ISD::ADD, dl, VT, Op,
6527                                DAG.getNode(ISD::SRL, dl, VT, Op,
6528                                            DAG.getConstant(4, dl, ShVT))),
6529                    Mask0F);
6530   // v = (v * 0x01010101...) >> (Len - 8)
6531   if (Len > 8)
6532     Op =
6533         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6534                     DAG.getConstant(Len - 8, dl, ShVT));
6535 
6536   Result = Op;
6537   return true;
6538 }
6539 
6540 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6541                                 SelectionDAG &DAG) const {
6542   SDLoc dl(Node);
6543   EVT VT = Node->getValueType(0);
6544   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6545   SDValue Op = Node->getOperand(0);
6546   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6547 
6548   // If the non-ZERO_UNDEF version is supported we can use that instead.
6549   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6550       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6551     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6552     return true;
6553   }
6554 
6555   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6556   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6557     EVT SetCCVT =
6558         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6559     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6560     SDValue Zero = DAG.getConstant(0, dl, VT);
6561     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6562     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6563                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6564     return true;
6565   }
6566 
6567   // Only expand vector types if we have the appropriate vector bit operations.
6568   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6569                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6570                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6571                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6572     return false;
6573 
6574   // for now, we do this:
6575   // x = x | (x >> 1);
6576   // x = x | (x >> 2);
6577   // ...
6578   // x = x | (x >>16);
6579   // x = x | (x >>32); // for 64-bit input
6580   // return popcount(~x);
6581   //
6582   // Ref: "Hacker's Delight" by Henry Warren
6583   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6584     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6585     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6586                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6587   }
6588   Op = DAG.getNOT(dl, Op, VT);
6589   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6590   return true;
6591 }
6592 
6593 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6594                                 SelectionDAG &DAG) const {
6595   SDLoc dl(Node);
6596   EVT VT = Node->getValueType(0);
6597   SDValue Op = Node->getOperand(0);
6598   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6599 
6600   // If the non-ZERO_UNDEF version is supported we can use that instead.
6601   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6602       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6603     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6604     return true;
6605   }
6606 
6607   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6608   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6609     EVT SetCCVT =
6610         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6611     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6612     SDValue Zero = DAG.getConstant(0, dl, VT);
6613     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6614     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6615                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6616     return true;
6617   }
6618 
6619   // Only expand vector types if we have the appropriate vector bit operations.
6620   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6621                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6622                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6623                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6624                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6625                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6626     return false;
6627 
6628   // for now, we use: { return popcount(~x & (x - 1)); }
6629   // unless the target has ctlz but not ctpop, in which case we use:
6630   // { return 32 - nlz(~x & (x-1)); }
6631   // Ref: "Hacker's Delight" by Henry Warren
6632   SDValue Tmp = DAG.getNode(
6633       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6634       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6635 
6636   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6637   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6638     Result =
6639         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6640                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6641     return true;
6642   }
6643 
6644   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6645   return true;
6646 }
6647 
6648 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6649                                SelectionDAG &DAG) const {
6650   SDLoc dl(N);
6651   EVT VT = N->getValueType(0);
6652   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6653   SDValue Op = N->getOperand(0);
6654 
6655   // Only expand vector types if we have the appropriate vector operations.
6656   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6657                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6658                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6659     return false;
6660 
6661   SDValue Shift =
6662       DAG.getNode(ISD::SRA, dl, VT, Op,
6663                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6664   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6665   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6666   return true;
6667 }
6668 
6669 std::pair<SDValue, SDValue>
6670 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6671                                     SelectionDAG &DAG) const {
6672   SDLoc SL(LD);
6673   SDValue Chain = LD->getChain();
6674   SDValue BasePTR = LD->getBasePtr();
6675   EVT SrcVT = LD->getMemoryVT();
6676   EVT DstVT = LD->getValueType(0);
6677   ISD::LoadExtType ExtType = LD->getExtensionType();
6678 
6679   unsigned NumElem = SrcVT.getVectorNumElements();
6680 
6681   EVT SrcEltVT = SrcVT.getScalarType();
6682   EVT DstEltVT = DstVT.getScalarType();
6683 
6684   // A vector must always be stored in memory as-is, i.e. without any padding
6685   // between the elements, since various code depend on it, e.g. in the
6686   // handling of a bitcast of a vector type to int, which may be done with a
6687   // vector store followed by an integer load. A vector that does not have
6688   // elements that are byte-sized must therefore be stored as an integer
6689   // built out of the extracted vector elements.
6690   if (!SrcEltVT.isByteSized()) {
6691     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
6692     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
6693 
6694     unsigned NumSrcBits = SrcVT.getSizeInBits();
6695     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
6696 
6697     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
6698     SDValue SrcEltBitMask = DAG.getConstant(
6699         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
6700 
6701     // Load the whole vector and avoid masking off the top bits as it makes
6702     // the codegen worse.
6703     SDValue Load =
6704         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
6705                        LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
6706                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6707 
6708     SmallVector<SDValue, 8> Vals;
6709     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6710       unsigned ShiftIntoIdx =
6711           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6712       SDValue ShiftAmount =
6713           DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
6714                                      LoadVT, SL, /*LegalTypes=*/false);
6715       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
6716       SDValue Elt =
6717           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
6718       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
6719 
6720       if (ExtType != ISD::NON_EXTLOAD) {
6721         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
6722         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
6723       }
6724 
6725       Vals.push_back(Scalar);
6726     }
6727 
6728     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
6729     return std::make_pair(Value, Load.getValue(1));
6730   }
6731 
6732   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6733   assert(SrcEltVT.isByteSized());
6734 
6735   SmallVector<SDValue, 8> Vals;
6736   SmallVector<SDValue, 8> LoadChains;
6737 
6738   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6739     SDValue ScalarLoad =
6740         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6741                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6742                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6743                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6744 
6745     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6746 
6747     Vals.push_back(ScalarLoad.getValue(0));
6748     LoadChains.push_back(ScalarLoad.getValue(1));
6749   }
6750 
6751   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6752   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
6753 
6754   return std::make_pair(Value, NewChain);
6755 }
6756 
6757 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6758                                              SelectionDAG &DAG) const {
6759   SDLoc SL(ST);
6760 
6761   SDValue Chain = ST->getChain();
6762   SDValue BasePtr = ST->getBasePtr();
6763   SDValue Value = ST->getValue();
6764   EVT StVT = ST->getMemoryVT();
6765 
6766   // The type of the data we want to save
6767   EVT RegVT = Value.getValueType();
6768   EVT RegSclVT = RegVT.getScalarType();
6769 
6770   // The type of data as saved in memory.
6771   EVT MemSclVT = StVT.getScalarType();
6772 
6773   unsigned NumElem = StVT.getVectorNumElements();
6774 
6775   // A vector must always be stored in memory as-is, i.e. without any padding
6776   // between the elements, since various code depend on it, e.g. in the
6777   // handling of a bitcast of a vector type to int, which may be done with a
6778   // vector store followed by an integer load. A vector that does not have
6779   // elements that are byte-sized must therefore be stored as an integer
6780   // built out of the extracted vector elements.
6781   if (!MemSclVT.isByteSized()) {
6782     unsigned NumBits = StVT.getSizeInBits();
6783     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6784 
6785     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6786 
6787     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6788       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6789                                 DAG.getVectorIdxConstant(Idx, SL));
6790       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6791       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6792       unsigned ShiftIntoIdx =
6793           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6794       SDValue ShiftAmount =
6795           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6796       SDValue ShiftedElt =
6797           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6798       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6799     }
6800 
6801     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6802                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6803                         ST->getAAInfo());
6804   }
6805 
6806   // Store Stride in bytes
6807   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6808   assert(Stride && "Zero stride!");
6809   // Extract each of the elements from the original vector and save them into
6810   // memory individually.
6811   SmallVector<SDValue, 8> Stores;
6812   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6813     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6814                               DAG.getVectorIdxConstant(Idx, SL));
6815 
6816     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6817 
6818     // This scalar TruncStore may be illegal, but we legalize it later.
6819     SDValue Store = DAG.getTruncStore(
6820         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6821         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6822         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6823 
6824     Stores.push_back(Store);
6825   }
6826 
6827   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6828 }
6829 
6830 std::pair<SDValue, SDValue>
6831 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6832   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6833          "unaligned indexed loads not implemented!");
6834   SDValue Chain = LD->getChain();
6835   SDValue Ptr = LD->getBasePtr();
6836   EVT VT = LD->getValueType(0);
6837   EVT LoadedVT = LD->getMemoryVT();
6838   SDLoc dl(LD);
6839   auto &MF = DAG.getMachineFunction();
6840 
6841   if (VT.isFloatingPoint() || VT.isVector()) {
6842     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6843     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6844       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6845           LoadedVT.isVector()) {
6846         // Scalarize the load and let the individual components be handled.
6847         return scalarizeVectorLoad(LD, DAG);
6848       }
6849 
6850       // Expand to a (misaligned) integer load of the same size,
6851       // then bitconvert to floating point or vector.
6852       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6853                                     LD->getMemOperand());
6854       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6855       if (LoadedVT != VT)
6856         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6857                              ISD::ANY_EXTEND, dl, VT, Result);
6858 
6859       return std::make_pair(Result, newLoad.getValue(1));
6860     }
6861 
6862     // Copy the value to a (aligned) stack slot using (unaligned) integer
6863     // loads and stores, then do a (aligned) load from the stack slot.
6864     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6865     unsigned LoadedBytes = LoadedVT.getStoreSize();
6866     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6867     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6868 
6869     // Make sure the stack slot is also aligned for the register type.
6870     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6871     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6872     SmallVector<SDValue, 8> Stores;
6873     SDValue StackPtr = StackBase;
6874     unsigned Offset = 0;
6875 
6876     EVT PtrVT = Ptr.getValueType();
6877     EVT StackPtrVT = StackPtr.getValueType();
6878 
6879     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6880     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6881 
6882     // Do all but one copies using the full register width.
6883     for (unsigned i = 1; i < NumRegs; i++) {
6884       // Load one integer register's worth from the original location.
6885       SDValue Load = DAG.getLoad(
6886           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6887           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6888           LD->getAAInfo());
6889       // Follow the load with a store to the stack slot.  Remember the store.
6890       Stores.push_back(DAG.getStore(
6891           Load.getValue(1), dl, Load, StackPtr,
6892           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6893       // Increment the pointers.
6894       Offset += RegBytes;
6895 
6896       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6897       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6898     }
6899 
6900     // The last copy may be partial.  Do an extending load.
6901     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6902                                   8 * (LoadedBytes - Offset));
6903     SDValue Load =
6904         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6905                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6906                        MinAlign(LD->getAlignment(), Offset),
6907                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6908     // Follow the load with a store to the stack slot.  Remember the store.
6909     // On big-endian machines this requires a truncating store to ensure
6910     // that the bits end up in the right place.
6911     Stores.push_back(DAG.getTruncStore(
6912         Load.getValue(1), dl, Load, StackPtr,
6913         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6914 
6915     // The order of the stores doesn't matter - say it with a TokenFactor.
6916     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6917 
6918     // Finally, perform the original load only redirected to the stack slot.
6919     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6920                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6921                           LoadedVT);
6922 
6923     // Callers expect a MERGE_VALUES node.
6924     return std::make_pair(Load, TF);
6925   }
6926 
6927   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6928          "Unaligned load of unsupported type.");
6929 
6930   // Compute the new VT that is half the size of the old one.  This is an
6931   // integer MVT.
6932   unsigned NumBits = LoadedVT.getSizeInBits();
6933   EVT NewLoadedVT;
6934   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6935   NumBits >>= 1;
6936 
6937   unsigned Alignment = LD->getAlignment();
6938   unsigned IncrementSize = NumBits / 8;
6939   ISD::LoadExtType HiExtType = LD->getExtensionType();
6940 
6941   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6942   if (HiExtType == ISD::NON_EXTLOAD)
6943     HiExtType = ISD::ZEXTLOAD;
6944 
6945   // Load the value in two parts
6946   SDValue Lo, Hi;
6947   if (DAG.getDataLayout().isLittleEndian()) {
6948     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6949                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6950                         LD->getAAInfo());
6951 
6952     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6953     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6954                         LD->getPointerInfo().getWithOffset(IncrementSize),
6955                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6956                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6957   } else {
6958     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6959                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6960                         LD->getAAInfo());
6961 
6962     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6963     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6964                         LD->getPointerInfo().getWithOffset(IncrementSize),
6965                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6966                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6967   }
6968 
6969   // aggregate the two parts
6970   SDValue ShiftAmount =
6971       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6972                                                     DAG.getDataLayout()));
6973   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6974   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6975 
6976   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6977                              Hi.getValue(1));
6978 
6979   return std::make_pair(Result, TF);
6980 }
6981 
6982 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6983                                              SelectionDAG &DAG) const {
6984   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6985          "unaligned indexed stores not implemented!");
6986   SDValue Chain = ST->getChain();
6987   SDValue Ptr = ST->getBasePtr();
6988   SDValue Val = ST->getValue();
6989   EVT VT = Val.getValueType();
6990   int Alignment = ST->getAlignment();
6991   auto &MF = DAG.getMachineFunction();
6992   EVT StoreMemVT = ST->getMemoryVT();
6993 
6994   SDLoc dl(ST);
6995   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6996     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6997     if (isTypeLegal(intVT)) {
6998       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6999           StoreMemVT.isVector()) {
7000         // Scalarize the store and let the individual components be handled.
7001         SDValue Result = scalarizeVectorStore(ST, DAG);
7002         return Result;
7003       }
7004       // Expand to a bitconvert of the value to the integer type of the
7005       // same size, then a (misaligned) int store.
7006       // FIXME: Does not handle truncating floating point stores!
7007       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
7008       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
7009                             Alignment, ST->getMemOperand()->getFlags());
7010       return Result;
7011     }
7012     // Do a (aligned) store to a stack slot, then copy from the stack slot
7013     // to the final destination using (unaligned) integer loads and stores.
7014     MVT RegVT = getRegisterType(
7015         *DAG.getContext(),
7016         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
7017     EVT PtrVT = Ptr.getValueType();
7018     unsigned StoredBytes = StoreMemVT.getStoreSize();
7019     unsigned RegBytes = RegVT.getSizeInBits() / 8;
7020     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
7021 
7022     // Make sure the stack slot is also aligned for the register type.
7023     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
7024     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7025 
7026     // Perform the original store, only redirected to the stack slot.
7027     SDValue Store = DAG.getTruncStore(
7028         Chain, dl, Val, StackPtr,
7029         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
7030 
7031     EVT StackPtrVT = StackPtr.getValueType();
7032 
7033     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7034     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7035     SmallVector<SDValue, 8> Stores;
7036     unsigned Offset = 0;
7037 
7038     // Do all but one copies using the full register width.
7039     for (unsigned i = 1; i < NumRegs; i++) {
7040       // Load one integer register's worth from the stack slot.
7041       SDValue Load = DAG.getLoad(
7042           RegVT, dl, Store, StackPtr,
7043           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
7044       // Store it to the final location.  Remember the store.
7045       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
7046                                     ST->getPointerInfo().getWithOffset(Offset),
7047                                     MinAlign(ST->getAlignment(), Offset),
7048                                     ST->getMemOperand()->getFlags()));
7049       // Increment the pointers.
7050       Offset += RegBytes;
7051       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7052       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7053     }
7054 
7055     // The last store may be partial.  Do a truncating store.  On big-endian
7056     // machines this requires an extending load from the stack slot to ensure
7057     // that the bits are in the right place.
7058     EVT LoadMemVT =
7059         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
7060 
7061     // Load from the stack slot.
7062     SDValue Load = DAG.getExtLoad(
7063         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
7064         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
7065 
7066     Stores.push_back(
7067         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
7068                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
7069                           MinAlign(ST->getAlignment(), Offset),
7070                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
7071     // The order of the stores doesn't matter - say it with a TokenFactor.
7072     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7073     return Result;
7074   }
7075 
7076   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
7077          "Unaligned store of unknown type.");
7078   // Get the half-size VT
7079   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
7080   int NumBits = NewStoredVT.getSizeInBits();
7081   int IncrementSize = NumBits / 8;
7082 
7083   // Divide the stored value in two parts.
7084   SDValue ShiftAmount = DAG.getConstant(
7085       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
7086   SDValue Lo = Val;
7087   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
7088 
7089   // Store the two parts
7090   SDValue Store1, Store2;
7091   Store1 = DAG.getTruncStore(Chain, dl,
7092                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
7093                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
7094                              ST->getMemOperand()->getFlags());
7095 
7096   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
7097   Alignment = MinAlign(Alignment, IncrementSize);
7098   Store2 = DAG.getTruncStore(
7099       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
7100       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
7101       ST->getMemOperand()->getFlags(), ST->getAAInfo());
7102 
7103   SDValue Result =
7104       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
7105   return Result;
7106 }
7107 
7108 SDValue
7109 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
7110                                        const SDLoc &DL, EVT DataVT,
7111                                        SelectionDAG &DAG,
7112                                        bool IsCompressedMemory) const {
7113   SDValue Increment;
7114   EVT AddrVT = Addr.getValueType();
7115   EVT MaskVT = Mask.getValueType();
7116   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
7117          "Incompatible types of Data and Mask");
7118   if (IsCompressedMemory) {
7119     // Incrementing the pointer according to number of '1's in the mask.
7120     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
7121     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
7122     if (MaskIntVT.getSizeInBits() < 32) {
7123       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
7124       MaskIntVT = MVT::i32;
7125     }
7126 
7127     // Count '1's with POPCNT.
7128     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
7129     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
7130     // Scale is an element size in bytes.
7131     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
7132                                     AddrVT);
7133     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
7134   } else
7135     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
7136 
7137   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
7138 }
7139 
7140 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
7141                                        SDValue Idx,
7142                                        EVT VecVT,
7143                                        const SDLoc &dl) {
7144   if (isa<ConstantSDNode>(Idx))
7145     return Idx;
7146 
7147   EVT IdxVT = Idx.getValueType();
7148   unsigned NElts = VecVT.getVectorNumElements();
7149   if (isPowerOf2_32(NElts)) {
7150     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
7151                                      Log2_32(NElts));
7152     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
7153                        DAG.getConstant(Imm, dl, IdxVT));
7154   }
7155 
7156   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
7157                      DAG.getConstant(NElts - 1, dl, IdxVT));
7158 }
7159 
7160 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
7161                                                 SDValue VecPtr, EVT VecVT,
7162                                                 SDValue Index) const {
7163   SDLoc dl(Index);
7164   // Make sure the index type is big enough to compute in.
7165   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
7166 
7167   EVT EltVT = VecVT.getVectorElementType();
7168 
7169   // Calculate the element offset and add it to the pointer.
7170   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
7171   assert(EltSize * 8 == EltVT.getSizeInBits() &&
7172          "Converting bits to bytes lost precision");
7173 
7174   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
7175 
7176   EVT IdxVT = Index.getValueType();
7177 
7178   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
7179                       DAG.getConstant(EltSize, dl, IdxVT));
7180   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
7181 }
7182 
7183 //===----------------------------------------------------------------------===//
7184 // Implementation of Emulated TLS Model
7185 //===----------------------------------------------------------------------===//
7186 
7187 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
7188                                                 SelectionDAG &DAG) const {
7189   // Access to address of TLS varialbe xyz is lowered to a function call:
7190   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7191   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7192   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
7193   SDLoc dl(GA);
7194 
7195   ArgListTy Args;
7196   ArgListEntry Entry;
7197   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
7198   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
7199   StringRef EmuTlsVarName(NameString);
7200   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
7201   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
7202   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
7203   Entry.Ty = VoidPtrType;
7204   Args.push_back(Entry);
7205 
7206   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
7207 
7208   TargetLowering::CallLoweringInfo CLI(DAG);
7209   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
7210   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
7211   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7212 
7213   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7214   // At last for X86 targets, maybe good for other targets too?
7215   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7216   MFI.setAdjustsStack(true); // Is this only for X86 target?
7217   MFI.setHasCalls(true);
7218 
7219   assert((GA->getOffset() == 0) &&
7220          "Emulated TLS must have zero offset in GlobalAddressSDNode");
7221   return CallResult.first;
7222 }
7223 
7224 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7225                                                 SelectionDAG &DAG) const {
7226   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7227   if (!isCtlzFast())
7228     return SDValue();
7229   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7230   SDLoc dl(Op);
7231   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7232     if (C->isNullValue() && CC == ISD::SETEQ) {
7233       EVT VT = Op.getOperand(0).getValueType();
7234       SDValue Zext = Op.getOperand(0);
7235       if (VT.bitsLT(MVT::i32)) {
7236         VT = MVT::i32;
7237         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7238       }
7239       unsigned Log2b = Log2_32(VT.getSizeInBits());
7240       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7241       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7242                                 DAG.getConstant(Log2b, dl, MVT::i32));
7243       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7244     }
7245   }
7246   return SDValue();
7247 }
7248 
7249 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7250   unsigned Opcode = Node->getOpcode();
7251   SDValue LHS = Node->getOperand(0);
7252   SDValue RHS = Node->getOperand(1);
7253   EVT VT = LHS.getValueType();
7254   SDLoc dl(Node);
7255 
7256   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7257   assert(VT.isInteger() && "Expected operands to be integers");
7258 
7259   // usub.sat(a, b) -> umax(a, b) - b
7260   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
7261     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7262     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7263   }
7264 
7265   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
7266     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7267     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7268     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7269   }
7270 
7271   unsigned OverflowOp;
7272   switch (Opcode) {
7273   case ISD::SADDSAT:
7274     OverflowOp = ISD::SADDO;
7275     break;
7276   case ISD::UADDSAT:
7277     OverflowOp = ISD::UADDO;
7278     break;
7279   case ISD::SSUBSAT:
7280     OverflowOp = ISD::SSUBO;
7281     break;
7282   case ISD::USUBSAT:
7283     OverflowOp = ISD::USUBO;
7284     break;
7285   default:
7286     llvm_unreachable("Expected method to receive signed or unsigned saturation "
7287                      "addition or subtraction node.");
7288   }
7289 
7290   unsigned BitWidth = LHS.getScalarValueSizeInBits();
7291   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7292   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7293                                LHS, RHS);
7294   SDValue SumDiff = Result.getValue(0);
7295   SDValue Overflow = Result.getValue(1);
7296   SDValue Zero = DAG.getConstant(0, dl, VT);
7297   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7298 
7299   if (Opcode == ISD::UADDSAT) {
7300     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7301       // (LHS + RHS) | OverflowMask
7302       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7303       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7304     }
7305     // Overflow ? 0xffff.... : (LHS + RHS)
7306     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7307   } else if (Opcode == ISD::USUBSAT) {
7308     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7309       // (LHS - RHS) & ~OverflowMask
7310       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7311       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7312       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7313     }
7314     // Overflow ? 0 : (LHS - RHS)
7315     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7316   } else {
7317     // SatMax -> Overflow && SumDiff < 0
7318     // SatMin -> Overflow && SumDiff >= 0
7319     APInt MinVal = APInt::getSignedMinValue(BitWidth);
7320     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7321     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7322     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7323     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7324     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7325     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7326   }
7327 }
7328 
7329 SDValue
7330 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7331   assert((Node->getOpcode() == ISD::SMULFIX ||
7332           Node->getOpcode() == ISD::UMULFIX ||
7333           Node->getOpcode() == ISD::SMULFIXSAT ||
7334           Node->getOpcode() == ISD::UMULFIXSAT) &&
7335          "Expected a fixed point multiplication opcode");
7336 
7337   SDLoc dl(Node);
7338   SDValue LHS = Node->getOperand(0);
7339   SDValue RHS = Node->getOperand(1);
7340   EVT VT = LHS.getValueType();
7341   unsigned Scale = Node->getConstantOperandVal(2);
7342   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7343                      Node->getOpcode() == ISD::UMULFIXSAT);
7344   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7345                  Node->getOpcode() == ISD::SMULFIXSAT);
7346   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7347   unsigned VTSize = VT.getScalarSizeInBits();
7348 
7349   if (!Scale) {
7350     // [us]mul.fix(a, b, 0) -> mul(a, b)
7351     if (!Saturating) {
7352       if (isOperationLegalOrCustom(ISD::MUL, VT))
7353         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7354     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7355       SDValue Result =
7356           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7357       SDValue Product = Result.getValue(0);
7358       SDValue Overflow = Result.getValue(1);
7359       SDValue Zero = DAG.getConstant(0, dl, VT);
7360 
7361       APInt MinVal = APInt::getSignedMinValue(VTSize);
7362       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7363       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7364       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7365       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7366       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7367       return DAG.getSelect(dl, VT, Overflow, Result, Product);
7368     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7369       SDValue Result =
7370           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7371       SDValue Product = Result.getValue(0);
7372       SDValue Overflow = Result.getValue(1);
7373 
7374       APInt MaxVal = APInt::getMaxValue(VTSize);
7375       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7376       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7377     }
7378   }
7379 
7380   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7381          "Expected scale to be less than the number of bits if signed or at "
7382          "most the number of bits if unsigned.");
7383   assert(LHS.getValueType() == RHS.getValueType() &&
7384          "Expected both operands to be the same type");
7385 
7386   // Get the upper and lower bits of the result.
7387   SDValue Lo, Hi;
7388   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7389   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7390   if (isOperationLegalOrCustom(LoHiOp, VT)) {
7391     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7392     Lo = Result.getValue(0);
7393     Hi = Result.getValue(1);
7394   } else if (isOperationLegalOrCustom(HiOp, VT)) {
7395     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7396     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7397   } else if (VT.isVector()) {
7398     return SDValue();
7399   } else {
7400     report_fatal_error("Unable to expand fixed point multiplication.");
7401   }
7402 
7403   if (Scale == VTSize)
7404     // Result is just the top half since we'd be shifting by the width of the
7405     // operand. Overflow impossible so this works for both UMULFIX and
7406     // UMULFIXSAT.
7407     return Hi;
7408 
7409   // The result will need to be shifted right by the scale since both operands
7410   // are scaled. The result is given to us in 2 halves, so we only want part of
7411   // both in the result.
7412   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7413   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7414                                DAG.getConstant(Scale, dl, ShiftTy));
7415   if (!Saturating)
7416     return Result;
7417 
7418   if (!Signed) {
7419     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7420     // widened multiplication) aren't all zeroes.
7421 
7422     // Saturate to max if ((Hi >> Scale) != 0),
7423     // which is the same as if (Hi > ((1 << Scale) - 1))
7424     APInt MaxVal = APInt::getMaxValue(VTSize);
7425     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7426                                       dl, VT);
7427     Result = DAG.getSelectCC(dl, Hi, LowMask,
7428                              DAG.getConstant(MaxVal, dl, VT), Result,
7429                              ISD::SETUGT);
7430 
7431     return Result;
7432   }
7433 
7434   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7435   // widened multiplication) aren't all ones or all zeroes.
7436 
7437   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7438   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7439 
7440   if (Scale == 0) {
7441     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7442                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
7443     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7444     // Saturated to SatMin if wide product is negative, and SatMax if wide
7445     // product is positive ...
7446     SDValue Zero = DAG.getConstant(0, dl, VT);
7447     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7448                                                ISD::SETLT);
7449     // ... but only if we overflowed.
7450     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7451   }
7452 
7453   //  We handled Scale==0 above so all the bits to examine is in Hi.
7454 
7455   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7456   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7457   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7458                                     dl, VT);
7459   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7460   // Saturate to min if (Hi >> (Scale - 1)) < -1),
7461   // which is the same as if (HI < (-1 << (Scale - 1))
7462   SDValue HighMask =
7463       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7464                       dl, VT);
7465   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7466   return Result;
7467 }
7468 
7469 SDValue
7470 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
7471                                     SDValue LHS, SDValue RHS,
7472                                     unsigned Scale, SelectionDAG &DAG) const {
7473   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
7474           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
7475          "Expected a fixed point division opcode");
7476 
7477   EVT VT = LHS.getValueType();
7478   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
7479   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
7480   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7481 
7482   // If there is enough room in the type to upscale the LHS or downscale the
7483   // RHS before the division, we can perform it in this type without having to
7484   // resize. For signed operations, the LHS headroom is the number of
7485   // redundant sign bits, and for unsigned ones it is the number of zeroes.
7486   // The headroom for the RHS is the number of trailing zeroes.
7487   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
7488                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
7489   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
7490 
7491   // For signed saturating operations, we need to be able to detect true integer
7492   // division overflow; that is, when you have MIN / -EPS. However, this
7493   // is undefined behavior and if we emit divisions that could take such
7494   // values it may cause undesired behavior (arithmetic exceptions on x86, for
7495   // example).
7496   // Avoid this by requiring an extra bit so that we never get this case.
7497   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
7498   // signed saturating division, we need to emit a whopping 32-bit division.
7499   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
7500     return SDValue();
7501 
7502   unsigned LHSShift = std::min(LHSLead, Scale);
7503   unsigned RHSShift = Scale - LHSShift;
7504 
7505   // At this point, we know that if we shift the LHS up by LHSShift and the
7506   // RHS down by RHSShift, we can emit a regular division with a final scaling
7507   // factor of Scale.
7508 
7509   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7510   if (LHSShift)
7511     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
7512                       DAG.getConstant(LHSShift, dl, ShiftTy));
7513   if (RHSShift)
7514     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
7515                       DAG.getConstant(RHSShift, dl, ShiftTy));
7516 
7517   SDValue Quot;
7518   if (Signed) {
7519     // For signed operations, if the resulting quotient is negative and the
7520     // remainder is nonzero, subtract 1 from the quotient to round towards
7521     // negative infinity.
7522     SDValue Rem;
7523     // FIXME: Ideally we would always produce an SDIVREM here, but if the
7524     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
7525     // we couldn't just form a libcall, but the type legalizer doesn't do it.
7526     if (isTypeLegal(VT) &&
7527         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
7528       Quot = DAG.getNode(ISD::SDIVREM, dl,
7529                          DAG.getVTList(VT, VT),
7530                          LHS, RHS);
7531       Rem = Quot.getValue(1);
7532       Quot = Quot.getValue(0);
7533     } else {
7534       Quot = DAG.getNode(ISD::SDIV, dl, VT,
7535                          LHS, RHS);
7536       Rem = DAG.getNode(ISD::SREM, dl, VT,
7537                         LHS, RHS);
7538     }
7539     SDValue Zero = DAG.getConstant(0, dl, VT);
7540     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
7541     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
7542     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
7543     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
7544     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
7545                                DAG.getConstant(1, dl, VT));
7546     Quot = DAG.getSelect(dl, VT,
7547                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
7548                          Sub1, Quot);
7549   } else
7550     Quot = DAG.getNode(ISD::UDIV, dl, VT,
7551                        LHS, RHS);
7552 
7553   return Quot;
7554 }
7555 
7556 void TargetLowering::expandUADDSUBO(
7557     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7558   SDLoc dl(Node);
7559   SDValue LHS = Node->getOperand(0);
7560   SDValue RHS = Node->getOperand(1);
7561   bool IsAdd = Node->getOpcode() == ISD::UADDO;
7562 
7563   // If ADD/SUBCARRY is legal, use that instead.
7564   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7565   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7566     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7567     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7568                                     { LHS, RHS, CarryIn });
7569     Result = SDValue(NodeCarry.getNode(), 0);
7570     Overflow = SDValue(NodeCarry.getNode(), 1);
7571     return;
7572   }
7573 
7574   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7575                             LHS.getValueType(), LHS, RHS);
7576 
7577   EVT ResultType = Node->getValueType(1);
7578   EVT SetCCType = getSetCCResultType(
7579       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7580   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7581   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7582   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7583 }
7584 
7585 void TargetLowering::expandSADDSUBO(
7586     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7587   SDLoc dl(Node);
7588   SDValue LHS = Node->getOperand(0);
7589   SDValue RHS = Node->getOperand(1);
7590   bool IsAdd = Node->getOpcode() == ISD::SADDO;
7591 
7592   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7593                             LHS.getValueType(), LHS, RHS);
7594 
7595   EVT ResultType = Node->getValueType(1);
7596   EVT OType = getSetCCResultType(
7597       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7598 
7599   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7600   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7601   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7602     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7603     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7604     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7605     return;
7606   }
7607 
7608   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7609 
7610   // For an addition, the result should be less than one of the operands (LHS)
7611   // if and only if the other operand (RHS) is negative, otherwise there will
7612   // be overflow.
7613   // For a subtraction, the result should be less than one of the operands
7614   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7615   // otherwise there will be overflow.
7616   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7617   SDValue ConditionRHS =
7618       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7619 
7620   Overflow = DAG.getBoolExtOrTrunc(
7621       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7622       ResultType, ResultType);
7623 }
7624 
7625 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7626                                 SDValue &Overflow, SelectionDAG &DAG) const {
7627   SDLoc dl(Node);
7628   EVT VT = Node->getValueType(0);
7629   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7630   SDValue LHS = Node->getOperand(0);
7631   SDValue RHS = Node->getOperand(1);
7632   bool isSigned = Node->getOpcode() == ISD::SMULO;
7633 
7634   // For power-of-two multiplications we can use a simpler shift expansion.
7635   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7636     const APInt &C = RHSC->getAPIntValue();
7637     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7638     if (C.isPowerOf2()) {
7639       // smulo(x, signed_min) is same as umulo(x, signed_min).
7640       bool UseArithShift = isSigned && !C.isMinSignedValue();
7641       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7642       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7643       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7644       Overflow = DAG.getSetCC(dl, SetCCVT,
7645           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7646                       dl, VT, Result, ShiftAmt),
7647           LHS, ISD::SETNE);
7648       return true;
7649     }
7650   }
7651 
7652   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7653   if (VT.isVector())
7654     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7655                               VT.getVectorNumElements());
7656 
7657   SDValue BottomHalf;
7658   SDValue TopHalf;
7659   static const unsigned Ops[2][3] =
7660       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7661         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7662   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7663     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7664     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7665   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7666     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7667                              RHS);
7668     TopHalf = BottomHalf.getValue(1);
7669   } else if (isTypeLegal(WideVT)) {
7670     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7671     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7672     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7673     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7674     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7675         getShiftAmountTy(WideVT, DAG.getDataLayout()));
7676     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7677                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7678   } else {
7679     if (VT.isVector())
7680       return false;
7681 
7682     // We can fall back to a libcall with an illegal type for the MUL if we
7683     // have a libcall big enough.
7684     // Also, we can fall back to a division in some cases, but that's a big
7685     // performance hit in the general case.
7686     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7687     if (WideVT == MVT::i16)
7688       LC = RTLIB::MUL_I16;
7689     else if (WideVT == MVT::i32)
7690       LC = RTLIB::MUL_I32;
7691     else if (WideVT == MVT::i64)
7692       LC = RTLIB::MUL_I64;
7693     else if (WideVT == MVT::i128)
7694       LC = RTLIB::MUL_I128;
7695     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7696 
7697     SDValue HiLHS;
7698     SDValue HiRHS;
7699     if (isSigned) {
7700       // The high part is obtained by SRA'ing all but one of the bits of low
7701       // part.
7702       unsigned LoSize = VT.getSizeInBits();
7703       HiLHS =
7704           DAG.getNode(ISD::SRA, dl, VT, LHS,
7705                       DAG.getConstant(LoSize - 1, dl,
7706                                       getPointerTy(DAG.getDataLayout())));
7707       HiRHS =
7708           DAG.getNode(ISD::SRA, dl, VT, RHS,
7709                       DAG.getConstant(LoSize - 1, dl,
7710                                       getPointerTy(DAG.getDataLayout())));
7711     } else {
7712         HiLHS = DAG.getConstant(0, dl, VT);
7713         HiRHS = DAG.getConstant(0, dl, VT);
7714     }
7715 
7716     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7717     // pre-lowered to the correct types. This all depends upon WideVT not
7718     // being a legal type for the architecture and thus has to be split to
7719     // two arguments.
7720     SDValue Ret;
7721     TargetLowering::MakeLibCallOptions CallOptions;
7722     CallOptions.setSExt(isSigned);
7723     CallOptions.setIsPostTypeLegalization(true);
7724     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7725       // Halves of WideVT are packed into registers in different order
7726       // depending on platform endianness. This is usually handled by
7727       // the C calling convention, but we can't defer to it in
7728       // the legalizer.
7729       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7730       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7731     } else {
7732       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7733       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7734     }
7735     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7736            "Ret value is a collection of constituent nodes holding result.");
7737     if (DAG.getDataLayout().isLittleEndian()) {
7738       // Same as above.
7739       BottomHalf = Ret.getOperand(0);
7740       TopHalf = Ret.getOperand(1);
7741     } else {
7742       BottomHalf = Ret.getOperand(1);
7743       TopHalf = Ret.getOperand(0);
7744     }
7745   }
7746 
7747   Result = BottomHalf;
7748   if (isSigned) {
7749     SDValue ShiftAmt = DAG.getConstant(
7750         VT.getScalarSizeInBits() - 1, dl,
7751         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7752     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7753     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7754   } else {
7755     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7756                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7757   }
7758 
7759   // Truncate the result if SetCC returns a larger type than needed.
7760   EVT RType = Node->getValueType(1);
7761   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7762     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7763 
7764   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7765          "Unexpected result type for S/UMULO legalization");
7766   return true;
7767 }
7768 
7769 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7770   SDLoc dl(Node);
7771   bool NoNaN = Node->getFlags().hasNoNaNs();
7772   unsigned BaseOpcode = 0;
7773   switch (Node->getOpcode()) {
7774   default: llvm_unreachable("Expected VECREDUCE opcode");
7775   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7776   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7777   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7778   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7779   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7780   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7781   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7782   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7783   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7784   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7785   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7786   case ISD::VECREDUCE_FMAX:
7787     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7788     break;
7789   case ISD::VECREDUCE_FMIN:
7790     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7791     break;
7792   }
7793 
7794   SDValue Op = Node->getOperand(0);
7795   EVT VT = Op.getValueType();
7796 
7797   // Try to use a shuffle reduction for power of two vectors.
7798   if (VT.isPow2VectorType()) {
7799     while (VT.getVectorNumElements() > 1) {
7800       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7801       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7802         break;
7803 
7804       SDValue Lo, Hi;
7805       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7806       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7807       VT = HalfVT;
7808     }
7809   }
7810 
7811   EVT EltVT = VT.getVectorElementType();
7812   unsigned NumElts = VT.getVectorNumElements();
7813 
7814   SmallVector<SDValue, 8> Ops;
7815   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7816 
7817   SDValue Res = Ops[0];
7818   for (unsigned i = 1; i < NumElts; i++)
7819     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7820 
7821   // Result type may be wider than element type.
7822   if (EltVT != Node->getValueType(0))
7823     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7824   return Res;
7825 }
7826 
7827 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
7828                                SelectionDAG &DAG) const {
7829   EVT VT = Node->getValueType(0);
7830   SDLoc dl(Node);
7831   bool isSigned = Node->getOpcode() == ISD::SREM;
7832   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
7833   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
7834   SDValue Dividend = Node->getOperand(0);
7835   SDValue Divisor = Node->getOperand(1);
7836   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
7837     SDVTList VTs = DAG.getVTList(VT, VT);
7838     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
7839     return true;
7840   } else if (isOperationLegalOrCustom(DivOpc, VT)) {
7841     // X % Y -> X-X/Y*Y
7842     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
7843     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
7844     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
7845     return true;
7846   }
7847   return false;
7848 }
7849