1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // First, check if tail calls have been disabled in this function.
56   if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
57     return false;
58 
59   // Conservatively require the attributes of the call to match those of
60   // the return. Ignore NoAlias and NonNull because they don't affect the
61   // call sequence.
62   AttributeList CallerAttrs = F.getAttributes();
63   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
64           .removeAttribute(Attribute::NoAlias)
65           .removeAttribute(Attribute::NonNull)
66           .hasAttributes())
67     return false;
68 
69   // It's not safe to eliminate the sign / zero extension of the return value.
70   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
71       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
72     return false;
73 
74   // Check if the only use is a function return node.
75   return isUsedByReturnOnly(Node, Chain);
76 }
77 
78 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
79     const uint32_t *CallerPreservedMask,
80     const SmallVectorImpl<CCValAssign> &ArgLocs,
81     const SmallVectorImpl<SDValue> &OutVals) const {
82   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
83     const CCValAssign &ArgLoc = ArgLocs[I];
84     if (!ArgLoc.isRegLoc())
85       continue;
86     MCRegister Reg = ArgLoc.getLocReg();
87     // Only look at callee saved registers.
88     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
89       continue;
90     // Check that we pass the value used for the caller.
91     // (We look for a CopyFromReg reading a virtual register that is used
92     //  for the function live-in value of register Reg)
93     SDValue Value = OutVals[I];
94     if (Value->getOpcode() != ISD::CopyFromReg)
95       return false;
96     MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
97     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
98       return false;
99   }
100   return true;
101 }
102 
103 /// Set CallLoweringInfo attribute flags based on a call instruction
104 /// and called function attributes.
105 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
106                                                      unsigned ArgIdx) {
107   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
108   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
109   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
110   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
111   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
112   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
113   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
114   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
115   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
116   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
117   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
118   Alignment = Call->getParamAlign(ArgIdx);
119   ByValType = nullptr;
120   if (IsByVal)
121     ByValType = Call->getParamByValType(ArgIdx);
122   PreallocatedType = nullptr;
123   if (IsPreallocated)
124     PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
125 }
126 
127 /// Generate a libcall taking the given operands as arguments and returning a
128 /// result of type RetVT.
129 std::pair<SDValue, SDValue>
130 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
131                             ArrayRef<SDValue> Ops,
132                             MakeLibCallOptions CallOptions,
133                             const SDLoc &dl,
134                             SDValue InChain) const {
135   if (!InChain)
136     InChain = DAG.getEntryNode();
137 
138   TargetLowering::ArgListTy Args;
139   Args.reserve(Ops.size());
140 
141   TargetLowering::ArgListEntry Entry;
142   for (unsigned i = 0; i < Ops.size(); ++i) {
143     SDValue NewOp = Ops[i];
144     Entry.Node = NewOp;
145     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
146     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
147                                                  CallOptions.IsSExt);
148     Entry.IsZExt = !Entry.IsSExt;
149 
150     if (CallOptions.IsSoften &&
151         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
152       Entry.IsSExt = Entry.IsZExt = false;
153     }
154     Args.push_back(Entry);
155   }
156 
157   if (LC == RTLIB::UNKNOWN_LIBCALL)
158     report_fatal_error("Unsupported library call operation!");
159   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
160                                          getPointerTy(DAG.getDataLayout()));
161 
162   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
163   TargetLowering::CallLoweringInfo CLI(DAG);
164   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
165   bool zeroExtend = !signExtend;
166 
167   if (CallOptions.IsSoften &&
168       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
169     signExtend = zeroExtend = false;
170   }
171 
172   CLI.setDebugLoc(dl)
173       .setChain(InChain)
174       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
175       .setNoReturn(CallOptions.DoesNotReturn)
176       .setDiscardResult(!CallOptions.IsReturnValueUsed)
177       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
178       .setSExtResult(signExtend)
179       .setZExtResult(zeroExtend);
180   return LowerCallTo(CLI);
181 }
182 
183 bool TargetLowering::findOptimalMemOpLowering(
184     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
185     unsigned SrcAS, const AttributeList &FuncAttributes) const {
186   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
187     return false;
188 
189   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
190 
191   if (VT == MVT::Other) {
192     // Use the largest integer type whose alignment constraints are satisfied.
193     // We only need to check DstAlign here as SrcAlign is always greater or
194     // equal to DstAlign (or zero).
195     VT = MVT::i64;
196     if (Op.isFixedDstAlign())
197       while (
198           Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
199           !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
200         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
201     assert(VT.isInteger());
202 
203     // Find the largest legal integer type.
204     MVT LVT = MVT::i64;
205     while (!isTypeLegal(LVT))
206       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
207     assert(LVT.isInteger());
208 
209     // If the type we've chosen is larger than the largest legal integer type
210     // then use that instead.
211     if (VT.bitsGT(LVT))
212       VT = LVT;
213   }
214 
215   unsigned NumMemOps = 0;
216   uint64_t Size = Op.size();
217   while (Size) {
218     unsigned VTSize = VT.getSizeInBits() / 8;
219     while (VTSize > Size) {
220       // For now, only use non-vector load / store's for the left-over pieces.
221       EVT NewVT = VT;
222       unsigned NewVTSize;
223 
224       bool Found = false;
225       if (VT.isVector() || VT.isFloatingPoint()) {
226         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
227         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
228             isSafeMemOpType(NewVT.getSimpleVT()))
229           Found = true;
230         else if (NewVT == MVT::i64 &&
231                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
232                  isSafeMemOpType(MVT::f64)) {
233           // i64 is usually not legal on 32-bit targets, but f64 may be.
234           NewVT = MVT::f64;
235           Found = true;
236         }
237       }
238 
239       if (!Found) {
240         do {
241           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
242           if (NewVT == MVT::i8)
243             break;
244         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
245       }
246       NewVTSize = NewVT.getSizeInBits() / 8;
247 
248       // If the new VT cannot cover all of the remaining bits, then consider
249       // issuing a (or a pair of) unaligned and overlapping load / store.
250       bool Fast;
251       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
252           allowsMisalignedMemoryAccesses(
253               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
254               MachineMemOperand::MONone, &Fast) &&
255           Fast)
256         VTSize = Size;
257       else {
258         VT = NewVT;
259         VTSize = NewVTSize;
260       }
261     }
262 
263     if (++NumMemOps > Limit)
264       return false;
265 
266     MemOps.push_back(VT);
267     Size -= VTSize;
268   }
269 
270   return true;
271 }
272 
273 /// Soften the operands of a comparison. This code is shared among BR_CC,
274 /// SELECT_CC, and SETCC handlers.
275 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
276                                          SDValue &NewLHS, SDValue &NewRHS,
277                                          ISD::CondCode &CCCode,
278                                          const SDLoc &dl, const SDValue OldLHS,
279                                          const SDValue OldRHS) const {
280   SDValue Chain;
281   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
282                              OldRHS, Chain);
283 }
284 
285 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
286                                          SDValue &NewLHS, SDValue &NewRHS,
287                                          ISD::CondCode &CCCode,
288                                          const SDLoc &dl, const SDValue OldLHS,
289                                          const SDValue OldRHS,
290                                          SDValue &Chain,
291                                          bool IsSignaling) const {
292   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
293   // not supporting it. We can update this code when libgcc provides such
294   // functions.
295 
296   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
297          && "Unsupported setcc type!");
298 
299   // Expand into one or more soft-fp libcall(s).
300   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
301   bool ShouldInvertCC = false;
302   switch (CCCode) {
303   case ISD::SETEQ:
304   case ISD::SETOEQ:
305     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
306           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
307           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
308     break;
309   case ISD::SETNE:
310   case ISD::SETUNE:
311     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
312           (VT == MVT::f64) ? RTLIB::UNE_F64 :
313           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
314     break;
315   case ISD::SETGE:
316   case ISD::SETOGE:
317     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
318           (VT == MVT::f64) ? RTLIB::OGE_F64 :
319           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
320     break;
321   case ISD::SETLT:
322   case ISD::SETOLT:
323     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
324           (VT == MVT::f64) ? RTLIB::OLT_F64 :
325           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
326     break;
327   case ISD::SETLE:
328   case ISD::SETOLE:
329     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
330           (VT == MVT::f64) ? RTLIB::OLE_F64 :
331           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
332     break;
333   case ISD::SETGT:
334   case ISD::SETOGT:
335     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
336           (VT == MVT::f64) ? RTLIB::OGT_F64 :
337           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
338     break;
339   case ISD::SETO:
340     ShouldInvertCC = true;
341     LLVM_FALLTHROUGH;
342   case ISD::SETUO:
343     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
344           (VT == MVT::f64) ? RTLIB::UO_F64 :
345           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
346     break;
347   case ISD::SETONE:
348     // SETONE = O && UNE
349     ShouldInvertCC = true;
350     LLVM_FALLTHROUGH;
351   case ISD::SETUEQ:
352     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
353           (VT == MVT::f64) ? RTLIB::UO_F64 :
354           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
355     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
356           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
357           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
358     break;
359   default:
360     // Invert CC for unordered comparisons
361     ShouldInvertCC = true;
362     switch (CCCode) {
363     case ISD::SETULT:
364       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
365             (VT == MVT::f64) ? RTLIB::OGE_F64 :
366             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
367       break;
368     case ISD::SETULE:
369       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
370             (VT == MVT::f64) ? RTLIB::OGT_F64 :
371             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
372       break;
373     case ISD::SETUGT:
374       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
375             (VT == MVT::f64) ? RTLIB::OLE_F64 :
376             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
377       break;
378     case ISD::SETUGE:
379       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
380             (VT == MVT::f64) ? RTLIB::OLT_F64 :
381             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
382       break;
383     default: llvm_unreachable("Do not know how to soften this setcc!");
384     }
385   }
386 
387   // Use the target specific return value for comparions lib calls.
388   EVT RetVT = getCmpLibcallReturnType();
389   SDValue Ops[2] = {NewLHS, NewRHS};
390   TargetLowering::MakeLibCallOptions CallOptions;
391   EVT OpsVT[2] = { OldLHS.getValueType(),
392                    OldRHS.getValueType() };
393   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
394   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
395   NewLHS = Call.first;
396   NewRHS = DAG.getConstant(0, dl, RetVT);
397 
398   CCCode = getCmpLibcallCC(LC1);
399   if (ShouldInvertCC) {
400     assert(RetVT.isInteger());
401     CCCode = getSetCCInverse(CCCode, RetVT);
402   }
403 
404   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
405     // Update Chain.
406     Chain = Call.second;
407   } else {
408     EVT SetCCVT =
409         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
410     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
411     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
412     CCCode = getCmpLibcallCC(LC2);
413     if (ShouldInvertCC)
414       CCCode = getSetCCInverse(CCCode, RetVT);
415     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
416     if (Chain)
417       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
418                           Call2.second);
419     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
420                          Tmp.getValueType(), Tmp, NewLHS);
421     NewRHS = SDValue();
422   }
423 }
424 
425 /// Return the entry encoding for a jump table in the current function. The
426 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
427 unsigned TargetLowering::getJumpTableEncoding() const {
428   // In non-pic modes, just use the address of a block.
429   if (!isPositionIndependent())
430     return MachineJumpTableInfo::EK_BlockAddress;
431 
432   // In PIC mode, if the target supports a GPRel32 directive, use it.
433   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
434     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
435 
436   // Otherwise, use a label difference.
437   return MachineJumpTableInfo::EK_LabelDifference32;
438 }
439 
440 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
441                                                  SelectionDAG &DAG) const {
442   // If our PIC model is GP relative, use the global offset table as the base.
443   unsigned JTEncoding = getJumpTableEncoding();
444 
445   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
446       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
447     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
448 
449   return Table;
450 }
451 
452 /// This returns the relocation base for the given PIC jumptable, the same as
453 /// getPICJumpTableRelocBase, but as an MCExpr.
454 const MCExpr *
455 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
456                                              unsigned JTI,MCContext &Ctx) const{
457   // The normal PIC reloc base is the label at the start of the jump table.
458   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
459 }
460 
461 bool
462 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
463   const TargetMachine &TM = getTargetMachine();
464   const GlobalValue *GV = GA->getGlobal();
465 
466   // If the address is not even local to this DSO we will have to load it from
467   // a got and then add the offset.
468   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
469     return false;
470 
471   // If the code is position independent we will have to add a base register.
472   if (isPositionIndependent())
473     return false;
474 
475   // Otherwise we can do it.
476   return true;
477 }
478 
479 //===----------------------------------------------------------------------===//
480 //  Optimization Methods
481 //===----------------------------------------------------------------------===//
482 
483 /// If the specified instruction has a constant integer operand and there are
484 /// bits set in that constant that are not demanded, then clear those bits and
485 /// return true.
486 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
487                                             TargetLoweringOpt &TLO) const {
488   SDLoc DL(Op);
489   unsigned Opcode = Op.getOpcode();
490 
491   // Do target-specific constant optimization.
492   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
493     return TLO.New.getNode();
494 
495   // FIXME: ISD::SELECT, ISD::SELECT_CC
496   switch (Opcode) {
497   default:
498     break;
499   case ISD::XOR:
500   case ISD::AND:
501   case ISD::OR: {
502     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
503     if (!Op1C)
504       return false;
505 
506     // If this is a 'not' op, don't touch it because that's a canonical form.
507     const APInt &C = Op1C->getAPIntValue();
508     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
509       return false;
510 
511     if (!C.isSubsetOf(Demanded)) {
512       EVT VT = Op.getValueType();
513       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
514       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
515       return TLO.CombineTo(Op, NewOp);
516     }
517 
518     break;
519   }
520   }
521 
522   return false;
523 }
524 
525 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
526 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
527 /// generalized for targets with other types of implicit widening casts.
528 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
529                                       const APInt &Demanded,
530                                       TargetLoweringOpt &TLO) const {
531   assert(Op.getNumOperands() == 2 &&
532          "ShrinkDemandedOp only supports binary operators!");
533   assert(Op.getNode()->getNumValues() == 1 &&
534          "ShrinkDemandedOp only supports nodes with one result!");
535 
536   SelectionDAG &DAG = TLO.DAG;
537   SDLoc dl(Op);
538 
539   // Early return, as this function cannot handle vector types.
540   if (Op.getValueType().isVector())
541     return false;
542 
543   // Don't do this if the node has another user, which may require the
544   // full value.
545   if (!Op.getNode()->hasOneUse())
546     return false;
547 
548   // Search for the smallest integer type with free casts to and from
549   // Op's type. For expedience, just check power-of-2 integer types.
550   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
551   unsigned DemandedSize = Demanded.getActiveBits();
552   unsigned SmallVTBits = DemandedSize;
553   if (!isPowerOf2_32(SmallVTBits))
554     SmallVTBits = NextPowerOf2(SmallVTBits);
555   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
556     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
557     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
558         TLI.isZExtFree(SmallVT, Op.getValueType())) {
559       // We found a type with free casts.
560       SDValue X = DAG.getNode(
561           Op.getOpcode(), dl, SmallVT,
562           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
563           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
564       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
565       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
566       return TLO.CombineTo(Op, Z);
567     }
568   }
569   return false;
570 }
571 
572 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
573                                           DAGCombinerInfo &DCI) const {
574   SelectionDAG &DAG = DCI.DAG;
575   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
576                         !DCI.isBeforeLegalizeOps());
577   KnownBits Known;
578 
579   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
580   if (Simplified) {
581     DCI.AddToWorklist(Op.getNode());
582     DCI.CommitTargetLoweringOpt(TLO);
583   }
584   return Simplified;
585 }
586 
587 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
588                                           KnownBits &Known,
589                                           TargetLoweringOpt &TLO,
590                                           unsigned Depth,
591                                           bool AssumeSingleUse) const {
592   EVT VT = Op.getValueType();
593 
594   // TODO: We can probably do more work on calculating the known bits and
595   // simplifying the operations for scalable vectors, but for now we just
596   // bail out.
597   if (VT.isScalableVector()) {
598     // Pretend we don't know anything for now.
599     Known = KnownBits(DemandedBits.getBitWidth());
600     return false;
601   }
602 
603   APInt DemandedElts = VT.isVector()
604                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
605                            : APInt(1, 1);
606   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
607                               AssumeSingleUse);
608 }
609 
610 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
611 // TODO: Under what circumstances can we create nodes? Constant folding?
612 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
613     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
614     SelectionDAG &DAG, unsigned Depth) const {
615   // Limit search depth.
616   if (Depth >= SelectionDAG::MaxRecursionDepth)
617     return SDValue();
618 
619   // Ignore UNDEFs.
620   if (Op.isUndef())
621     return SDValue();
622 
623   // Not demanding any bits/elts from Op.
624   if (DemandedBits == 0 || DemandedElts == 0)
625     return DAG.getUNDEF(Op.getValueType());
626 
627   unsigned NumElts = DemandedElts.getBitWidth();
628   unsigned BitWidth = DemandedBits.getBitWidth();
629   KnownBits LHSKnown, RHSKnown;
630   switch (Op.getOpcode()) {
631   case ISD::BITCAST: {
632     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
633     EVT SrcVT = Src.getValueType();
634     EVT DstVT = Op.getValueType();
635     if (SrcVT == DstVT)
636       return Src;
637 
638     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
639     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
640     if (NumSrcEltBits == NumDstEltBits)
641       if (SDValue V = SimplifyMultipleUseDemandedBits(
642               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
643         return DAG.getBitcast(DstVT, V);
644 
645     // TODO - bigendian once we have test coverage.
646     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
647         DAG.getDataLayout().isLittleEndian()) {
648       unsigned Scale = NumDstEltBits / NumSrcEltBits;
649       unsigned NumSrcElts = SrcVT.getVectorNumElements();
650       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
651       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
652       for (unsigned i = 0; i != Scale; ++i) {
653         unsigned Offset = i * NumSrcEltBits;
654         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
655         if (!Sub.isNullValue()) {
656           DemandedSrcBits |= Sub;
657           for (unsigned j = 0; j != NumElts; ++j)
658             if (DemandedElts[j])
659               DemandedSrcElts.setBit((j * Scale) + i);
660         }
661       }
662 
663       if (SDValue V = SimplifyMultipleUseDemandedBits(
664               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
665         return DAG.getBitcast(DstVT, V);
666     }
667 
668     // TODO - bigendian once we have test coverage.
669     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
670         DAG.getDataLayout().isLittleEndian()) {
671       unsigned Scale = NumSrcEltBits / NumDstEltBits;
672       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
673       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
674       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
675       for (unsigned i = 0; i != NumElts; ++i)
676         if (DemandedElts[i]) {
677           unsigned Offset = (i % Scale) * NumDstEltBits;
678           DemandedSrcBits.insertBits(DemandedBits, Offset);
679           DemandedSrcElts.setBit(i / Scale);
680         }
681 
682       if (SDValue V = SimplifyMultipleUseDemandedBits(
683               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
684         return DAG.getBitcast(DstVT, V);
685     }
686 
687     break;
688   }
689   case ISD::AND: {
690     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
691     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
692 
693     // If all of the demanded bits are known 1 on one side, return the other.
694     // These bits cannot contribute to the result of the 'and' in this
695     // context.
696     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
697       return Op.getOperand(0);
698     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
699       return Op.getOperand(1);
700     break;
701   }
702   case ISD::OR: {
703     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
704     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
705 
706     // If all of the demanded bits are known zero on one side, return the
707     // other.  These bits cannot contribute to the result of the 'or' in this
708     // context.
709     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
710       return Op.getOperand(0);
711     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
712       return Op.getOperand(1);
713     break;
714   }
715   case ISD::XOR: {
716     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
717     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
718 
719     // If all of the demanded bits are known zero on one side, return the
720     // other.
721     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
722       return Op.getOperand(0);
723     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
724       return Op.getOperand(1);
725     break;
726   }
727   case ISD::SHL: {
728     // If we are only demanding sign bits then we can use the shift source
729     // directly.
730     if (const APInt *MaxSA =
731             DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
732       SDValue Op0 = Op.getOperand(0);
733       unsigned ShAmt = MaxSA->getZExtValue();
734       unsigned NumSignBits =
735           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
736       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
737       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
738         return Op0;
739     }
740     break;
741   }
742   case ISD::SETCC: {
743     SDValue Op0 = Op.getOperand(0);
744     SDValue Op1 = Op.getOperand(1);
745     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
746     // If (1) we only need the sign-bit, (2) the setcc operands are the same
747     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
748     // -1, we may be able to bypass the setcc.
749     if (DemandedBits.isSignMask() &&
750         Op0.getScalarValueSizeInBits() == BitWidth &&
751         getBooleanContents(Op0.getValueType()) ==
752             BooleanContent::ZeroOrNegativeOneBooleanContent) {
753       // If we're testing X < 0, then this compare isn't needed - just use X!
754       // FIXME: We're limiting to integer types here, but this should also work
755       // if we don't care about FP signed-zero. The use of SETLT with FP means
756       // that we don't care about NaNs.
757       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
758           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
759         return Op0;
760     }
761     break;
762   }
763   case ISD::SIGN_EXTEND_INREG: {
764     // If none of the extended bits are demanded, eliminate the sextinreg.
765     SDValue Op0 = Op.getOperand(0);
766     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
767     unsigned ExBits = ExVT.getScalarSizeInBits();
768     if (DemandedBits.getActiveBits() <= ExBits)
769       return Op0;
770     // If the input is already sign extended, just drop the extension.
771     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
772     if (NumSignBits >= (BitWidth - ExBits + 1))
773       return Op0;
774     break;
775   }
776   case ISD::ANY_EXTEND_VECTOR_INREG:
777   case ISD::SIGN_EXTEND_VECTOR_INREG:
778   case ISD::ZERO_EXTEND_VECTOR_INREG: {
779     // If we only want the lowest element and none of extended bits, then we can
780     // return the bitcasted source vector.
781     SDValue Src = Op.getOperand(0);
782     EVT SrcVT = Src.getValueType();
783     EVT DstVT = Op.getValueType();
784     if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
785         DAG.getDataLayout().isLittleEndian() &&
786         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
787       return DAG.getBitcast(DstVT, Src);
788     }
789     break;
790   }
791   case ISD::INSERT_VECTOR_ELT: {
792     // If we don't demand the inserted element, return the base vector.
793     SDValue Vec = Op.getOperand(0);
794     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
795     EVT VecVT = Vec.getValueType();
796     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
797         !DemandedElts[CIdx->getZExtValue()])
798       return Vec;
799     break;
800   }
801   case ISD::INSERT_SUBVECTOR: {
802     // If we don't demand the inserted subvector, return the base vector.
803     SDValue Vec = Op.getOperand(0);
804     SDValue Sub = Op.getOperand(1);
805     uint64_t Idx = Op.getConstantOperandVal(2);
806     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
807     if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
808       return Vec;
809     break;
810   }
811   case ISD::VECTOR_SHUFFLE: {
812     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
813 
814     // If all the demanded elts are from one operand and are inline,
815     // then we can use the operand directly.
816     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
817     for (unsigned i = 0; i != NumElts; ++i) {
818       int M = ShuffleMask[i];
819       if (M < 0 || !DemandedElts[i])
820         continue;
821       AllUndef = false;
822       IdentityLHS &= (M == (int)i);
823       IdentityRHS &= ((M - NumElts) == i);
824     }
825 
826     if (AllUndef)
827       return DAG.getUNDEF(Op.getValueType());
828     if (IdentityLHS)
829       return Op.getOperand(0);
830     if (IdentityRHS)
831       return Op.getOperand(1);
832     break;
833   }
834   default:
835     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
836       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
837               Op, DemandedBits, DemandedElts, DAG, Depth))
838         return V;
839     break;
840   }
841   return SDValue();
842 }
843 
844 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
845     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
846     unsigned Depth) const {
847   EVT VT = Op.getValueType();
848   APInt DemandedElts = VT.isVector()
849                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
850                            : APInt(1, 1);
851   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
852                                          Depth);
853 }
854 
855 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
856     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
857     unsigned Depth) const {
858   APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
859   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
860                                          Depth);
861 }
862 
863 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
864 /// result of Op are ever used downstream. If we can use this information to
865 /// simplify Op, create a new simplified DAG node and return true, returning the
866 /// original and new nodes in Old and New. Otherwise, analyze the expression and
867 /// return a mask of Known bits for the expression (used to simplify the
868 /// caller).  The Known bits may only be accurate for those bits in the
869 /// OriginalDemandedBits and OriginalDemandedElts.
870 bool TargetLowering::SimplifyDemandedBits(
871     SDValue Op, const APInt &OriginalDemandedBits,
872     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
873     unsigned Depth, bool AssumeSingleUse) const {
874   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
875   assert(Op.getScalarValueSizeInBits() == BitWidth &&
876          "Mask size mismatches value type size!");
877 
878   // Don't know anything.
879   Known = KnownBits(BitWidth);
880 
881   // TODO: We can probably do more work on calculating the known bits and
882   // simplifying the operations for scalable vectors, but for now we just
883   // bail out.
884   if (Op.getValueType().isScalableVector())
885     return false;
886 
887   unsigned NumElts = OriginalDemandedElts.getBitWidth();
888   assert((!Op.getValueType().isVector() ||
889           NumElts == Op.getValueType().getVectorNumElements()) &&
890          "Unexpected vector size");
891 
892   APInt DemandedBits = OriginalDemandedBits;
893   APInt DemandedElts = OriginalDemandedElts;
894   SDLoc dl(Op);
895   auto &DL = TLO.DAG.getDataLayout();
896 
897   // Undef operand.
898   if (Op.isUndef())
899     return false;
900 
901   if (Op.getOpcode() == ISD::Constant) {
902     // We know all of the bits for a constant!
903     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
904     Known.Zero = ~Known.One;
905     return false;
906   }
907 
908   // Other users may use these bits.
909   EVT VT = Op.getValueType();
910   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
911     if (Depth != 0) {
912       // If not at the root, Just compute the Known bits to
913       // simplify things downstream.
914       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
915       return false;
916     }
917     // If this is the root being simplified, allow it to have multiple uses,
918     // just set the DemandedBits/Elts to all bits.
919     DemandedBits = APInt::getAllOnesValue(BitWidth);
920     DemandedElts = APInt::getAllOnesValue(NumElts);
921   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
922     // Not demanding any bits/elts from Op.
923     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
924   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
925     // Limit search depth.
926     return false;
927   }
928 
929   KnownBits Known2;
930   switch (Op.getOpcode()) {
931   case ISD::TargetConstant:
932     llvm_unreachable("Can't simplify this node");
933   case ISD::SCALAR_TO_VECTOR: {
934     if (!DemandedElts[0])
935       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
936 
937     KnownBits SrcKnown;
938     SDValue Src = Op.getOperand(0);
939     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
940     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
941     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
942       return true;
943 
944     // Upper elements are undef, so only get the knownbits if we just demand
945     // the bottom element.
946     if (DemandedElts == 1)
947       Known = SrcKnown.anyextOrTrunc(BitWidth);
948     break;
949   }
950   case ISD::BUILD_VECTOR:
951     // Collect the known bits that are shared by every demanded element.
952     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
953     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
954     return false; // Don't fall through, will infinitely loop.
955   case ISD::LOAD: {
956     LoadSDNode *LD = cast<LoadSDNode>(Op);
957     if (getTargetConstantFromLoad(LD)) {
958       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
959       return false; // Don't fall through, will infinitely loop.
960     } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
961       // If this is a ZEXTLoad and we are looking at the loaded value.
962       EVT MemVT = LD->getMemoryVT();
963       unsigned MemBits = MemVT.getScalarSizeInBits();
964       Known.Zero.setBitsFrom(MemBits);
965       return false; // Don't fall through, will infinitely loop.
966     }
967     break;
968   }
969   case ISD::INSERT_VECTOR_ELT: {
970     SDValue Vec = Op.getOperand(0);
971     SDValue Scl = Op.getOperand(1);
972     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
973     EVT VecVT = Vec.getValueType();
974 
975     // If index isn't constant, assume we need all vector elements AND the
976     // inserted element.
977     APInt DemandedVecElts(DemandedElts);
978     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
979       unsigned Idx = CIdx->getZExtValue();
980       DemandedVecElts.clearBit(Idx);
981 
982       // Inserted element is not required.
983       if (!DemandedElts[Idx])
984         return TLO.CombineTo(Op, Vec);
985     }
986 
987     KnownBits KnownScl;
988     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
989     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
990     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
991       return true;
992 
993     Known = KnownScl.anyextOrTrunc(BitWidth);
994 
995     KnownBits KnownVec;
996     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
997                              Depth + 1))
998       return true;
999 
1000     if (!!DemandedVecElts) {
1001       Known.One &= KnownVec.One;
1002       Known.Zero &= KnownVec.Zero;
1003     }
1004 
1005     return false;
1006   }
1007   case ISD::INSERT_SUBVECTOR: {
1008     // Demand any elements from the subvector and the remainder from the src its
1009     // inserted into.
1010     SDValue Src = Op.getOperand(0);
1011     SDValue Sub = Op.getOperand(1);
1012     uint64_t Idx = Op.getConstantOperandVal(2);
1013     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1014     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1015     APInt DemandedSrcElts = DemandedElts;
1016     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
1017 
1018     KnownBits KnownSub, KnownSrc;
1019     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1020                              Depth + 1))
1021       return true;
1022     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1023                              Depth + 1))
1024       return true;
1025 
1026     Known.Zero.setAllBits();
1027     Known.One.setAllBits();
1028     if (!!DemandedSubElts) {
1029       Known.One &= KnownSub.One;
1030       Known.Zero &= KnownSub.Zero;
1031     }
1032     if (!!DemandedSrcElts) {
1033       Known.One &= KnownSrc.One;
1034       Known.Zero &= KnownSrc.Zero;
1035     }
1036 
1037     // Attempt to avoid multi-use src if we don't need anything from it.
1038     if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
1039         !DemandedSrcElts.isAllOnesValue()) {
1040       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1041           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1042       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1043           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1044       if (NewSub || NewSrc) {
1045         NewSub = NewSub ? NewSub : Sub;
1046         NewSrc = NewSrc ? NewSrc : Src;
1047         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1048                                         Op.getOperand(2));
1049         return TLO.CombineTo(Op, NewOp);
1050       }
1051     }
1052     break;
1053   }
1054   case ISD::EXTRACT_SUBVECTOR: {
1055     // Offset the demanded elts by the subvector index.
1056     SDValue Src = Op.getOperand(0);
1057     uint64_t Idx = Op.getConstantOperandVal(1);
1058     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1059     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1060 
1061     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1062                              Depth + 1))
1063       return true;
1064 
1065     // Attempt to avoid multi-use src if we don't need anything from it.
1066     if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
1067       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1068           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1069       if (DemandedSrc) {
1070         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1071                                         Op.getOperand(1));
1072         return TLO.CombineTo(Op, NewOp);
1073       }
1074     }
1075     break;
1076   }
1077   case ISD::CONCAT_VECTORS: {
1078     Known.Zero.setAllBits();
1079     Known.One.setAllBits();
1080     EVT SubVT = Op.getOperand(0).getValueType();
1081     unsigned NumSubVecs = Op.getNumOperands();
1082     unsigned NumSubElts = SubVT.getVectorNumElements();
1083     for (unsigned i = 0; i != NumSubVecs; ++i) {
1084       APInt DemandedSubElts =
1085           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1086       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1087                                Known2, TLO, Depth + 1))
1088         return true;
1089       // Known bits are shared by every demanded subvector element.
1090       if (!!DemandedSubElts) {
1091         Known.One &= Known2.One;
1092         Known.Zero &= Known2.Zero;
1093       }
1094     }
1095     break;
1096   }
1097   case ISD::VECTOR_SHUFFLE: {
1098     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1099 
1100     // Collect demanded elements from shuffle operands..
1101     APInt DemandedLHS(NumElts, 0);
1102     APInt DemandedRHS(NumElts, 0);
1103     for (unsigned i = 0; i != NumElts; ++i) {
1104       if (!DemandedElts[i])
1105         continue;
1106       int M = ShuffleMask[i];
1107       if (M < 0) {
1108         // For UNDEF elements, we don't know anything about the common state of
1109         // the shuffle result.
1110         DemandedLHS.clearAllBits();
1111         DemandedRHS.clearAllBits();
1112         break;
1113       }
1114       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1115       if (M < (int)NumElts)
1116         DemandedLHS.setBit(M);
1117       else
1118         DemandedRHS.setBit(M - NumElts);
1119     }
1120 
1121     if (!!DemandedLHS || !!DemandedRHS) {
1122       SDValue Op0 = Op.getOperand(0);
1123       SDValue Op1 = Op.getOperand(1);
1124 
1125       Known.Zero.setAllBits();
1126       Known.One.setAllBits();
1127       if (!!DemandedLHS) {
1128         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1129                                  Depth + 1))
1130           return true;
1131         Known.One &= Known2.One;
1132         Known.Zero &= Known2.Zero;
1133       }
1134       if (!!DemandedRHS) {
1135         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1136                                  Depth + 1))
1137           return true;
1138         Known.One &= Known2.One;
1139         Known.Zero &= Known2.Zero;
1140       }
1141 
1142       // Attempt to avoid multi-use ops if we don't need anything from them.
1143       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1144           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1145       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1146           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1147       if (DemandedOp0 || DemandedOp1) {
1148         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1149         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1150         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1151         return TLO.CombineTo(Op, NewOp);
1152       }
1153     }
1154     break;
1155   }
1156   case ISD::AND: {
1157     SDValue Op0 = Op.getOperand(0);
1158     SDValue Op1 = Op.getOperand(1);
1159 
1160     // If the RHS is a constant, check to see if the LHS would be zero without
1161     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1162     // simplify the LHS, here we're using information from the LHS to simplify
1163     // the RHS.
1164     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1165       // Do not increment Depth here; that can cause an infinite loop.
1166       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1167       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1168       if ((LHSKnown.Zero & DemandedBits) ==
1169           (~RHSC->getAPIntValue() & DemandedBits))
1170         return TLO.CombineTo(Op, Op0);
1171 
1172       // If any of the set bits in the RHS are known zero on the LHS, shrink
1173       // the constant.
1174       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1175         return true;
1176 
1177       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1178       // constant, but if this 'and' is only clearing bits that were just set by
1179       // the xor, then this 'and' can be eliminated by shrinking the mask of
1180       // the xor. For example, for a 32-bit X:
1181       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1182       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1183           LHSKnown.One == ~RHSC->getAPIntValue()) {
1184         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1185         return TLO.CombineTo(Op, Xor);
1186       }
1187     }
1188 
1189     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1190                              Depth + 1))
1191       return true;
1192     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1193     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1194                              Known2, TLO, Depth + 1))
1195       return true;
1196     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1197 
1198     // Attempt to avoid multi-use ops if we don't need anything from them.
1199     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1200       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1201           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1202       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1203           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1204       if (DemandedOp0 || DemandedOp1) {
1205         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1206         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1207         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1208         return TLO.CombineTo(Op, NewOp);
1209       }
1210     }
1211 
1212     // If all of the demanded bits are known one on one side, return the other.
1213     // These bits cannot contribute to the result of the 'and'.
1214     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1215       return TLO.CombineTo(Op, Op0);
1216     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1217       return TLO.CombineTo(Op, Op1);
1218     // If all of the demanded bits in the inputs are known zeros, return zero.
1219     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1220       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1221     // If the RHS is a constant, see if we can simplify it.
1222     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1223       return true;
1224     // If the operation can be done in a smaller type, do so.
1225     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1226       return true;
1227 
1228     Known &= Known2;
1229     break;
1230   }
1231   case ISD::OR: {
1232     SDValue Op0 = Op.getOperand(0);
1233     SDValue Op1 = Op.getOperand(1);
1234 
1235     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1236                              Depth + 1))
1237       return true;
1238     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1239     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1240                              Known2, TLO, Depth + 1))
1241       return true;
1242     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1243 
1244     // Attempt to avoid multi-use ops if we don't need anything from them.
1245     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1246       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1247           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1248       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1249           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1250       if (DemandedOp0 || DemandedOp1) {
1251         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1252         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1253         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1254         return TLO.CombineTo(Op, NewOp);
1255       }
1256     }
1257 
1258     // If all of the demanded bits are known zero on one side, return the other.
1259     // These bits cannot contribute to the result of the 'or'.
1260     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1261       return TLO.CombineTo(Op, Op0);
1262     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1263       return TLO.CombineTo(Op, Op1);
1264     // If the RHS is a constant, see if we can simplify it.
1265     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1266       return true;
1267     // If the operation can be done in a smaller type, do so.
1268     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1269       return true;
1270 
1271     Known |= Known2;
1272     break;
1273   }
1274   case ISD::XOR: {
1275     SDValue Op0 = Op.getOperand(0);
1276     SDValue Op1 = Op.getOperand(1);
1277 
1278     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1279                              Depth + 1))
1280       return true;
1281     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1282     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1283                              Depth + 1))
1284       return true;
1285     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1286 
1287     // Attempt to avoid multi-use ops if we don't need anything from them.
1288     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1289       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1290           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1291       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1292           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1293       if (DemandedOp0 || DemandedOp1) {
1294         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1295         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1296         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1297         return TLO.CombineTo(Op, NewOp);
1298       }
1299     }
1300 
1301     // If all of the demanded bits are known zero on one side, return the other.
1302     // These bits cannot contribute to the result of the 'xor'.
1303     if (DemandedBits.isSubsetOf(Known.Zero))
1304       return TLO.CombineTo(Op, Op0);
1305     if (DemandedBits.isSubsetOf(Known2.Zero))
1306       return TLO.CombineTo(Op, Op1);
1307     // If the operation can be done in a smaller type, do so.
1308     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1309       return true;
1310 
1311     // If all of the unknown bits are known to be zero on one side or the other
1312     // (but not both) turn this into an *inclusive* or.
1313     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1314     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1315       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1316 
1317     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1318       // If one side is a constant, and all of the known set bits on the other
1319       // side are also set in the constant, turn this into an AND, as we know
1320       // the bits will be cleared.
1321       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1322       // NB: it is okay if more bits are known than are requested
1323       if (C->getAPIntValue() == Known2.One) {
1324         SDValue ANDC =
1325             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1326         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1327       }
1328 
1329       // If the RHS is a constant, see if we can change it. Don't alter a -1
1330       // constant because that's a 'not' op, and that is better for combining
1331       // and codegen.
1332       if (!C->isAllOnesValue()) {
1333         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1334           // We're flipping all demanded bits. Flip the undemanded bits too.
1335           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1336           return TLO.CombineTo(Op, New);
1337         }
1338         // If we can't turn this into a 'not', try to shrink the constant.
1339         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1340           return true;
1341       }
1342     }
1343 
1344     Known ^= Known2;
1345     break;
1346   }
1347   case ISD::SELECT:
1348     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1349                              Depth + 1))
1350       return true;
1351     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1352                              Depth + 1))
1353       return true;
1354     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1355     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1356 
1357     // If the operands are constants, see if we can simplify them.
1358     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1359       return true;
1360 
1361     // Only known if known in both the LHS and RHS.
1362     Known.One &= Known2.One;
1363     Known.Zero &= Known2.Zero;
1364     break;
1365   case ISD::SELECT_CC:
1366     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1367                              Depth + 1))
1368       return true;
1369     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1370                              Depth + 1))
1371       return true;
1372     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1373     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1374 
1375     // If the operands are constants, see if we can simplify them.
1376     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1377       return true;
1378 
1379     // Only known if known in both the LHS and RHS.
1380     Known.One &= Known2.One;
1381     Known.Zero &= Known2.Zero;
1382     break;
1383   case ISD::SETCC: {
1384     SDValue Op0 = Op.getOperand(0);
1385     SDValue Op1 = Op.getOperand(1);
1386     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1387     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1388     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1389     // -1, we may be able to bypass the setcc.
1390     if (DemandedBits.isSignMask() &&
1391         Op0.getScalarValueSizeInBits() == BitWidth &&
1392         getBooleanContents(Op0.getValueType()) ==
1393             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1394       // If we're testing X < 0, then this compare isn't needed - just use X!
1395       // FIXME: We're limiting to integer types here, but this should also work
1396       // if we don't care about FP signed-zero. The use of SETLT with FP means
1397       // that we don't care about NaNs.
1398       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1399           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1400         return TLO.CombineTo(Op, Op0);
1401 
1402       // TODO: Should we check for other forms of sign-bit comparisons?
1403       // Examples: X <= -1, X >= 0
1404     }
1405     if (getBooleanContents(Op0.getValueType()) ==
1406             TargetLowering::ZeroOrOneBooleanContent &&
1407         BitWidth > 1)
1408       Known.Zero.setBitsFrom(1);
1409     break;
1410   }
1411   case ISD::SHL: {
1412     SDValue Op0 = Op.getOperand(0);
1413     SDValue Op1 = Op.getOperand(1);
1414     EVT ShiftVT = Op1.getValueType();
1415 
1416     if (const APInt *SA =
1417             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1418       unsigned ShAmt = SA->getZExtValue();
1419       if (ShAmt == 0)
1420         return TLO.CombineTo(Op, Op0);
1421 
1422       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1423       // single shift.  We can do this if the bottom bits (which are shifted
1424       // out) are never demanded.
1425       // TODO - support non-uniform vector amounts.
1426       if (Op0.getOpcode() == ISD::SRL) {
1427         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1428           if (const APInt *SA2 =
1429                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1430             unsigned C1 = SA2->getZExtValue();
1431             unsigned Opc = ISD::SHL;
1432             int Diff = ShAmt - C1;
1433             if (Diff < 0) {
1434               Diff = -Diff;
1435               Opc = ISD::SRL;
1436             }
1437             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1438             return TLO.CombineTo(
1439                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1440           }
1441         }
1442       }
1443 
1444       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1445       // are not demanded. This will likely allow the anyext to be folded away.
1446       // TODO - support non-uniform vector amounts.
1447       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1448         SDValue InnerOp = Op0.getOperand(0);
1449         EVT InnerVT = InnerOp.getValueType();
1450         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1451         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1452             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1453           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1454           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1455             ShTy = InnerVT;
1456           SDValue NarrowShl =
1457               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1458                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1459           return TLO.CombineTo(
1460               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1461         }
1462 
1463         // Repeat the SHL optimization above in cases where an extension
1464         // intervenes: (shl (anyext (shr x, c1)), c2) to
1465         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1466         // aren't demanded (as above) and that the shifted upper c1 bits of
1467         // x aren't demanded.
1468         // TODO - support non-uniform vector amounts.
1469         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1470             InnerOp.hasOneUse()) {
1471           if (const APInt *SA2 =
1472                   TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1473             unsigned InnerShAmt = SA2->getZExtValue();
1474             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1475                 DemandedBits.getActiveBits() <=
1476                     (InnerBits - InnerShAmt + ShAmt) &&
1477                 DemandedBits.countTrailingZeros() >= ShAmt) {
1478               SDValue NewSA =
1479                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1480               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1481                                                InnerOp.getOperand(0));
1482               return TLO.CombineTo(
1483                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1484             }
1485           }
1486         }
1487       }
1488 
1489       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1490       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1491                                Depth + 1))
1492         return true;
1493       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1494       Known.Zero <<= ShAmt;
1495       Known.One <<= ShAmt;
1496       // low bits known zero.
1497       Known.Zero.setLowBits(ShAmt);
1498 
1499       // Try shrinking the operation as long as the shift amount will still be
1500       // in range.
1501       if ((ShAmt < DemandedBits.getActiveBits()) &&
1502           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1503         return true;
1504     }
1505 
1506     // If we are only demanding sign bits then we can use the shift source
1507     // directly.
1508     if (const APInt *MaxSA =
1509             TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1510       unsigned ShAmt = MaxSA->getZExtValue();
1511       unsigned NumSignBits =
1512           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1513       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1514       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1515         return TLO.CombineTo(Op, Op0);
1516     }
1517     break;
1518   }
1519   case ISD::SRL: {
1520     SDValue Op0 = Op.getOperand(0);
1521     SDValue Op1 = Op.getOperand(1);
1522     EVT ShiftVT = Op1.getValueType();
1523 
1524     if (const APInt *SA =
1525             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1526       unsigned ShAmt = SA->getZExtValue();
1527       if (ShAmt == 0)
1528         return TLO.CombineTo(Op, Op0);
1529 
1530       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1531       // single shift.  We can do this if the top bits (which are shifted out)
1532       // are never demanded.
1533       // TODO - support non-uniform vector amounts.
1534       if (Op0.getOpcode() == ISD::SHL) {
1535         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1536           if (const APInt *SA2 =
1537                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1538             unsigned C1 = SA2->getZExtValue();
1539             unsigned Opc = ISD::SRL;
1540             int Diff = ShAmt - C1;
1541             if (Diff < 0) {
1542               Diff = -Diff;
1543               Opc = ISD::SHL;
1544             }
1545             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1546             return TLO.CombineTo(
1547                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1548           }
1549         }
1550       }
1551 
1552       APInt InDemandedMask = (DemandedBits << ShAmt);
1553 
1554       // If the shift is exact, then it does demand the low bits (and knows that
1555       // they are zero).
1556       if (Op->getFlags().hasExact())
1557         InDemandedMask.setLowBits(ShAmt);
1558 
1559       // Compute the new bits that are at the top now.
1560       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1561                                Depth + 1))
1562         return true;
1563       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1564       Known.Zero.lshrInPlace(ShAmt);
1565       Known.One.lshrInPlace(ShAmt);
1566       // High bits known zero.
1567       Known.Zero.setHighBits(ShAmt);
1568     }
1569     break;
1570   }
1571   case ISD::SRA: {
1572     SDValue Op0 = Op.getOperand(0);
1573     SDValue Op1 = Op.getOperand(1);
1574     EVT ShiftVT = Op1.getValueType();
1575 
1576     // If we only want bits that already match the signbit then we don't need
1577     // to shift.
1578     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1579     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1580         NumHiDemandedBits)
1581       return TLO.CombineTo(Op, Op0);
1582 
1583     // If this is an arithmetic shift right and only the low-bit is set, we can
1584     // always convert this into a logical shr, even if the shift amount is
1585     // variable.  The low bit of the shift cannot be an input sign bit unless
1586     // the shift amount is >= the size of the datatype, which is undefined.
1587     if (DemandedBits.isOneValue())
1588       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1589 
1590     if (const APInt *SA =
1591             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1592       unsigned ShAmt = SA->getZExtValue();
1593       if (ShAmt == 0)
1594         return TLO.CombineTo(Op, Op0);
1595 
1596       APInt InDemandedMask = (DemandedBits << ShAmt);
1597 
1598       // If the shift is exact, then it does demand the low bits (and knows that
1599       // they are zero).
1600       if (Op->getFlags().hasExact())
1601         InDemandedMask.setLowBits(ShAmt);
1602 
1603       // If any of the demanded bits are produced by the sign extension, we also
1604       // demand the input sign bit.
1605       if (DemandedBits.countLeadingZeros() < ShAmt)
1606         InDemandedMask.setSignBit();
1607 
1608       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1609                                Depth + 1))
1610         return true;
1611       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1612       Known.Zero.lshrInPlace(ShAmt);
1613       Known.One.lshrInPlace(ShAmt);
1614 
1615       // If the input sign bit is known to be zero, or if none of the top bits
1616       // are demanded, turn this into an unsigned shift right.
1617       if (Known.Zero[BitWidth - ShAmt - 1] ||
1618           DemandedBits.countLeadingZeros() >= ShAmt) {
1619         SDNodeFlags Flags;
1620         Flags.setExact(Op->getFlags().hasExact());
1621         return TLO.CombineTo(
1622             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1623       }
1624 
1625       int Log2 = DemandedBits.exactLogBase2();
1626       if (Log2 >= 0) {
1627         // The bit must come from the sign.
1628         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1629         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1630       }
1631 
1632       if (Known.One[BitWidth - ShAmt - 1])
1633         // New bits are known one.
1634         Known.One.setHighBits(ShAmt);
1635 
1636       // Attempt to avoid multi-use ops if we don't need anything from them.
1637       if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1638         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1639             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1640         if (DemandedOp0) {
1641           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1642           return TLO.CombineTo(Op, NewOp);
1643         }
1644       }
1645     }
1646     break;
1647   }
1648   case ISD::FSHL:
1649   case ISD::FSHR: {
1650     SDValue Op0 = Op.getOperand(0);
1651     SDValue Op1 = Op.getOperand(1);
1652     SDValue Op2 = Op.getOperand(2);
1653     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1654 
1655     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1656       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1657 
1658       // For fshl, 0-shift returns the 1st arg.
1659       // For fshr, 0-shift returns the 2nd arg.
1660       if (Amt == 0) {
1661         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1662                                  Known, TLO, Depth + 1))
1663           return true;
1664         break;
1665       }
1666 
1667       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1668       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1669       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1670       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1671       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1672                                Depth + 1))
1673         return true;
1674       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1675                                Depth + 1))
1676         return true;
1677 
1678       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1679       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1680       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1681       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1682       Known.One |= Known2.One;
1683       Known.Zero |= Known2.Zero;
1684     }
1685 
1686     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1687     if (isPowerOf2_32(BitWidth)) {
1688       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1689       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1690                                Known2, TLO, Depth + 1))
1691         return true;
1692     }
1693     break;
1694   }
1695   case ISD::ROTL:
1696   case ISD::ROTR: {
1697     SDValue Op0 = Op.getOperand(0);
1698     SDValue Op1 = Op.getOperand(1);
1699 
1700     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1701     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1702       return TLO.CombineTo(Op, Op0);
1703 
1704     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1705     if (isPowerOf2_32(BitWidth)) {
1706       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1707       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1708                                Depth + 1))
1709         return true;
1710     }
1711     break;
1712   }
1713   case ISD::BITREVERSE: {
1714     SDValue Src = Op.getOperand(0);
1715     APInt DemandedSrcBits = DemandedBits.reverseBits();
1716     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1717                              Depth + 1))
1718       return true;
1719     Known.One = Known2.One.reverseBits();
1720     Known.Zero = Known2.Zero.reverseBits();
1721     break;
1722   }
1723   case ISD::BSWAP: {
1724     SDValue Src = Op.getOperand(0);
1725     APInt DemandedSrcBits = DemandedBits.byteSwap();
1726     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1727                              Depth + 1))
1728       return true;
1729     Known.One = Known2.One.byteSwap();
1730     Known.Zero = Known2.Zero.byteSwap();
1731     break;
1732   }
1733   case ISD::SIGN_EXTEND_INREG: {
1734     SDValue Op0 = Op.getOperand(0);
1735     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1736     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1737 
1738     // If we only care about the highest bit, don't bother shifting right.
1739     if (DemandedBits.isSignMask()) {
1740       unsigned NumSignBits =
1741           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1742       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1743       // However if the input is already sign extended we expect the sign
1744       // extension to be dropped altogether later and do not simplify.
1745       if (!AlreadySignExtended) {
1746         // Compute the correct shift amount type, which must be getShiftAmountTy
1747         // for scalar types after legalization.
1748         EVT ShiftAmtTy = VT;
1749         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1750           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1751 
1752         SDValue ShiftAmt =
1753             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1754         return TLO.CombineTo(Op,
1755                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1756       }
1757     }
1758 
1759     // If none of the extended bits are demanded, eliminate the sextinreg.
1760     if (DemandedBits.getActiveBits() <= ExVTBits)
1761       return TLO.CombineTo(Op, Op0);
1762 
1763     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1764 
1765     // Since the sign extended bits are demanded, we know that the sign
1766     // bit is demanded.
1767     InputDemandedBits.setBit(ExVTBits - 1);
1768 
1769     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1770       return true;
1771     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1772 
1773     // If the sign bit of the input is known set or clear, then we know the
1774     // top bits of the result.
1775 
1776     // If the input sign bit is known zero, convert this into a zero extension.
1777     if (Known.Zero[ExVTBits - 1])
1778       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
1779 
1780     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1781     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1782       Known.One.setBitsFrom(ExVTBits);
1783       Known.Zero &= Mask;
1784     } else { // Input sign bit unknown
1785       Known.Zero &= Mask;
1786       Known.One &= Mask;
1787     }
1788     break;
1789   }
1790   case ISD::BUILD_PAIR: {
1791     EVT HalfVT = Op.getOperand(0).getValueType();
1792     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1793 
1794     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1795     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1796 
1797     KnownBits KnownLo, KnownHi;
1798 
1799     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1800       return true;
1801 
1802     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1803       return true;
1804 
1805     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1806                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1807 
1808     Known.One = KnownLo.One.zext(BitWidth) |
1809                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1810     break;
1811   }
1812   case ISD::ZERO_EXTEND:
1813   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1814     SDValue Src = Op.getOperand(0);
1815     EVT SrcVT = Src.getValueType();
1816     unsigned InBits = SrcVT.getScalarSizeInBits();
1817     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1818     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1819 
1820     // If none of the top bits are demanded, convert this into an any_extend.
1821     if (DemandedBits.getActiveBits() <= InBits) {
1822       // If we only need the non-extended bits of the bottom element
1823       // then we can just bitcast to the result.
1824       if (IsVecInReg && DemandedElts == 1 &&
1825           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1826           TLO.DAG.getDataLayout().isLittleEndian())
1827         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1828 
1829       unsigned Opc =
1830           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1831       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1832         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1833     }
1834 
1835     APInt InDemandedBits = DemandedBits.trunc(InBits);
1836     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1837     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1838                              Depth + 1))
1839       return true;
1840     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1841     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1842     Known = Known.zext(BitWidth);
1843     break;
1844   }
1845   case ISD::SIGN_EXTEND:
1846   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1847     SDValue Src = Op.getOperand(0);
1848     EVT SrcVT = Src.getValueType();
1849     unsigned InBits = SrcVT.getScalarSizeInBits();
1850     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1851     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1852 
1853     // If none of the top bits are demanded, convert this into an any_extend.
1854     if (DemandedBits.getActiveBits() <= InBits) {
1855       // If we only need the non-extended bits of the bottom element
1856       // then we can just bitcast to the result.
1857       if (IsVecInReg && DemandedElts == 1 &&
1858           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1859           TLO.DAG.getDataLayout().isLittleEndian())
1860         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1861 
1862       unsigned Opc =
1863           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1864       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1865         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1866     }
1867 
1868     APInt InDemandedBits = DemandedBits.trunc(InBits);
1869     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1870 
1871     // Since some of the sign extended bits are demanded, we know that the sign
1872     // bit is demanded.
1873     InDemandedBits.setBit(InBits - 1);
1874 
1875     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1876                              Depth + 1))
1877       return true;
1878     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1879     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1880 
1881     // If the sign bit is known one, the top bits match.
1882     Known = Known.sext(BitWidth);
1883 
1884     // If the sign bit is known zero, convert this to a zero extend.
1885     if (Known.isNonNegative()) {
1886       unsigned Opc =
1887           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1888       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1889         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1890     }
1891     break;
1892   }
1893   case ISD::ANY_EXTEND:
1894   case ISD::ANY_EXTEND_VECTOR_INREG: {
1895     SDValue Src = Op.getOperand(0);
1896     EVT SrcVT = Src.getValueType();
1897     unsigned InBits = SrcVT.getScalarSizeInBits();
1898     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1899     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1900 
1901     // If we only need the bottom element then we can just bitcast.
1902     // TODO: Handle ANY_EXTEND?
1903     if (IsVecInReg && DemandedElts == 1 &&
1904         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1905         TLO.DAG.getDataLayout().isLittleEndian())
1906       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1907 
1908     APInt InDemandedBits = DemandedBits.trunc(InBits);
1909     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1910     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1911                              Depth + 1))
1912       return true;
1913     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1914     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1915     Known = Known.anyext(BitWidth);
1916 
1917     // Attempt to avoid multi-use ops if we don't need anything from them.
1918     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1919             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1920       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1921     break;
1922   }
1923   case ISD::TRUNCATE: {
1924     SDValue Src = Op.getOperand(0);
1925 
1926     // Simplify the input, using demanded bit information, and compute the known
1927     // zero/one bits live out.
1928     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1929     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1930     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1931       return true;
1932     Known = Known.trunc(BitWidth);
1933 
1934     // Attempt to avoid multi-use ops if we don't need anything from them.
1935     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1936             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1937       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1938 
1939     // If the input is only used by this truncate, see if we can shrink it based
1940     // on the known demanded bits.
1941     if (Src.getNode()->hasOneUse()) {
1942       switch (Src.getOpcode()) {
1943       default:
1944         break;
1945       case ISD::SRL:
1946         // Shrink SRL by a constant if none of the high bits shifted in are
1947         // demanded.
1948         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1949           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1950           // undesirable.
1951           break;
1952 
1953         SDValue ShAmt = Src.getOperand(1);
1954         auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1955         if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
1956           break;
1957         uint64_t ShVal = ShAmtC->getZExtValue();
1958 
1959         APInt HighBits =
1960             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1961         HighBits.lshrInPlace(ShVal);
1962         HighBits = HighBits.trunc(BitWidth);
1963 
1964         if (!(HighBits & DemandedBits)) {
1965           // None of the shifted in bits are needed.  Add a truncate of the
1966           // shift input, then shift it.
1967           if (TLO.LegalTypes())
1968             ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1969           SDValue NewTrunc =
1970               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1971           return TLO.CombineTo(
1972               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
1973         }
1974         break;
1975       }
1976     }
1977 
1978     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1979     break;
1980   }
1981   case ISD::AssertZext: {
1982     // AssertZext demands all of the high bits, plus any of the low bits
1983     // demanded by its users.
1984     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1985     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1986     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1987                              TLO, Depth + 1))
1988       return true;
1989     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1990 
1991     Known.Zero |= ~InMask;
1992     break;
1993   }
1994   case ISD::EXTRACT_VECTOR_ELT: {
1995     SDValue Src = Op.getOperand(0);
1996     SDValue Idx = Op.getOperand(1);
1997     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1998     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1999 
2000     // Demand the bits from every vector element without a constant index.
2001     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
2002     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2003       if (CIdx->getAPIntValue().ult(NumSrcElts))
2004         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2005 
2006     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2007     // anything about the extended bits.
2008     APInt DemandedSrcBits = DemandedBits;
2009     if (BitWidth > EltBitWidth)
2010       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2011 
2012     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2013                              Depth + 1))
2014       return true;
2015 
2016     // Attempt to avoid multi-use ops if we don't need anything from them.
2017     if (!DemandedSrcBits.isAllOnesValue() ||
2018         !DemandedSrcElts.isAllOnesValue()) {
2019       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2020               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2021         SDValue NewOp =
2022             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2023         return TLO.CombineTo(Op, NewOp);
2024       }
2025     }
2026 
2027     Known = Known2;
2028     if (BitWidth > EltBitWidth)
2029       Known = Known.anyext(BitWidth);
2030     break;
2031   }
2032   case ISD::BITCAST: {
2033     SDValue Src = Op.getOperand(0);
2034     EVT SrcVT = Src.getValueType();
2035     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2036 
2037     // If this is an FP->Int bitcast and if the sign bit is the only
2038     // thing demanded, turn this into a FGETSIGN.
2039     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2040         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2041         SrcVT.isFloatingPoint()) {
2042       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2043       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2044       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2045           SrcVT != MVT::f128) {
2046         // Cannot eliminate/lower SHL for f128 yet.
2047         EVT Ty = OpVTLegal ? VT : MVT::i32;
2048         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2049         // place.  We expect the SHL to be eliminated by other optimizations.
2050         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2051         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2052         if (!OpVTLegal && OpVTSizeInBits > 32)
2053           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2054         unsigned ShVal = Op.getValueSizeInBits() - 1;
2055         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2056         return TLO.CombineTo(Op,
2057                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2058       }
2059     }
2060 
2061     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2062     // Demand the elt/bit if any of the original elts/bits are demanded.
2063     // TODO - bigendian once we have test coverage.
2064     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
2065         TLO.DAG.getDataLayout().isLittleEndian()) {
2066       unsigned Scale = BitWidth / NumSrcEltBits;
2067       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2068       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2069       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2070       for (unsigned i = 0; i != Scale; ++i) {
2071         unsigned Offset = i * NumSrcEltBits;
2072         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
2073         if (!Sub.isNullValue()) {
2074           DemandedSrcBits |= Sub;
2075           for (unsigned j = 0; j != NumElts; ++j)
2076             if (DemandedElts[j])
2077               DemandedSrcElts.setBit((j * Scale) + i);
2078         }
2079       }
2080 
2081       APInt KnownSrcUndef, KnownSrcZero;
2082       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2083                                      KnownSrcZero, TLO, Depth + 1))
2084         return true;
2085 
2086       KnownBits KnownSrcBits;
2087       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2088                                KnownSrcBits, TLO, Depth + 1))
2089         return true;
2090     } else if ((NumSrcEltBits % BitWidth) == 0 &&
2091                TLO.DAG.getDataLayout().isLittleEndian()) {
2092       unsigned Scale = NumSrcEltBits / BitWidth;
2093       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2094       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2095       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2096       for (unsigned i = 0; i != NumElts; ++i)
2097         if (DemandedElts[i]) {
2098           unsigned Offset = (i % Scale) * BitWidth;
2099           DemandedSrcBits.insertBits(DemandedBits, Offset);
2100           DemandedSrcElts.setBit(i / Scale);
2101         }
2102 
2103       if (SrcVT.isVector()) {
2104         APInt KnownSrcUndef, KnownSrcZero;
2105         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2106                                        KnownSrcZero, TLO, Depth + 1))
2107           return true;
2108       }
2109 
2110       KnownBits KnownSrcBits;
2111       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2112                                KnownSrcBits, TLO, Depth + 1))
2113         return true;
2114     }
2115 
2116     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2117     // recursive call where Known may be useful to the caller.
2118     if (Depth > 0) {
2119       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2120       return false;
2121     }
2122     break;
2123   }
2124   case ISD::ADD:
2125   case ISD::MUL:
2126   case ISD::SUB: {
2127     // Add, Sub, and Mul don't demand any bits in positions beyond that
2128     // of the highest bit demanded of them.
2129     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2130     SDNodeFlags Flags = Op.getNode()->getFlags();
2131     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2132     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2133     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2134                              Depth + 1) ||
2135         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2136                              Depth + 1) ||
2137         // See if the operation should be performed at a smaller bit width.
2138         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2139       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2140         // Disable the nsw and nuw flags. We can no longer guarantee that we
2141         // won't wrap after simplification.
2142         Flags.setNoSignedWrap(false);
2143         Flags.setNoUnsignedWrap(false);
2144         SDValue NewOp =
2145             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2146         return TLO.CombineTo(Op, NewOp);
2147       }
2148       return true;
2149     }
2150 
2151     // Attempt to avoid multi-use ops if we don't need anything from them.
2152     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2153       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2154           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2155       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2156           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2157       if (DemandedOp0 || DemandedOp1) {
2158         Flags.setNoSignedWrap(false);
2159         Flags.setNoUnsignedWrap(false);
2160         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2161         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2162         SDValue NewOp =
2163             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2164         return TLO.CombineTo(Op, NewOp);
2165       }
2166     }
2167 
2168     // If we have a constant operand, we may be able to turn it into -1 if we
2169     // do not demand the high bits. This can make the constant smaller to
2170     // encode, allow more general folding, or match specialized instruction
2171     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2172     // is probably not useful (and could be detrimental).
2173     ConstantSDNode *C = isConstOrConstSplat(Op1);
2174     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2175     if (C && !C->isAllOnesValue() && !C->isOne() &&
2176         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2177       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2178       // Disable the nsw and nuw flags. We can no longer guarantee that we
2179       // won't wrap after simplification.
2180       Flags.setNoSignedWrap(false);
2181       Flags.setNoUnsignedWrap(false);
2182       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2183       return TLO.CombineTo(Op, NewOp);
2184     }
2185 
2186     LLVM_FALLTHROUGH;
2187   }
2188   default:
2189     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2190       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2191                                             Known, TLO, Depth))
2192         return true;
2193       break;
2194     }
2195 
2196     // Just use computeKnownBits to compute output bits.
2197     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2198     break;
2199   }
2200 
2201   // If we know the value of all of the demanded bits, return this as a
2202   // constant.
2203   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2204     // Avoid folding to a constant if any OpaqueConstant is involved.
2205     const SDNode *N = Op.getNode();
2206     for (SDNodeIterator I = SDNodeIterator::begin(N),
2207                         E = SDNodeIterator::end(N);
2208          I != E; ++I) {
2209       SDNode *Op = *I;
2210       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2211         if (C->isOpaque())
2212           return false;
2213     }
2214     // TODO: Handle float bits as well.
2215     if (VT.isInteger())
2216       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2217   }
2218 
2219   return false;
2220 }
2221 
2222 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2223                                                 const APInt &DemandedElts,
2224                                                 APInt &KnownUndef,
2225                                                 APInt &KnownZero,
2226                                                 DAGCombinerInfo &DCI) const {
2227   SelectionDAG &DAG = DCI.DAG;
2228   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2229                         !DCI.isBeforeLegalizeOps());
2230 
2231   bool Simplified =
2232       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2233   if (Simplified) {
2234     DCI.AddToWorklist(Op.getNode());
2235     DCI.CommitTargetLoweringOpt(TLO);
2236   }
2237 
2238   return Simplified;
2239 }
2240 
2241 /// Given a vector binary operation and known undefined elements for each input
2242 /// operand, compute whether each element of the output is undefined.
2243 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2244                                          const APInt &UndefOp0,
2245                                          const APInt &UndefOp1) {
2246   EVT VT = BO.getValueType();
2247   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2248          "Vector binop only");
2249 
2250   EVT EltVT = VT.getVectorElementType();
2251   unsigned NumElts = VT.getVectorNumElements();
2252   assert(UndefOp0.getBitWidth() == NumElts &&
2253          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2254 
2255   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2256                                    const APInt &UndefVals) {
2257     if (UndefVals[Index])
2258       return DAG.getUNDEF(EltVT);
2259 
2260     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2261       // Try hard to make sure that the getNode() call is not creating temporary
2262       // nodes. Ignore opaque integers because they do not constant fold.
2263       SDValue Elt = BV->getOperand(Index);
2264       auto *C = dyn_cast<ConstantSDNode>(Elt);
2265       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2266         return Elt;
2267     }
2268 
2269     return SDValue();
2270   };
2271 
2272   APInt KnownUndef = APInt::getNullValue(NumElts);
2273   for (unsigned i = 0; i != NumElts; ++i) {
2274     // If both inputs for this element are either constant or undef and match
2275     // the element type, compute the constant/undef result for this element of
2276     // the vector.
2277     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2278     // not handle FP constants. The code within getNode() should be refactored
2279     // to avoid the danger of creating a bogus temporary node here.
2280     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2281     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2282     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2283       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2284         KnownUndef.setBit(i);
2285   }
2286   return KnownUndef;
2287 }
2288 
2289 bool TargetLowering::SimplifyDemandedVectorElts(
2290     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2291     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2292     bool AssumeSingleUse) const {
2293   EVT VT = Op.getValueType();
2294   unsigned Opcode = Op.getOpcode();
2295   APInt DemandedElts = OriginalDemandedElts;
2296   unsigned NumElts = DemandedElts.getBitWidth();
2297   assert(VT.isVector() && "Expected vector op");
2298 
2299   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2300 
2301   // TODO: For now we assume we know nothing about scalable vectors.
2302   if (VT.isScalableVector())
2303     return false;
2304 
2305   assert(VT.getVectorNumElements() == NumElts &&
2306          "Mask size mismatches value type element count!");
2307 
2308   // Undef operand.
2309   if (Op.isUndef()) {
2310     KnownUndef.setAllBits();
2311     return false;
2312   }
2313 
2314   // If Op has other users, assume that all elements are needed.
2315   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2316     DemandedElts.setAllBits();
2317 
2318   // Not demanding any elements from Op.
2319   if (DemandedElts == 0) {
2320     KnownUndef.setAllBits();
2321     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2322   }
2323 
2324   // Limit search depth.
2325   if (Depth >= SelectionDAG::MaxRecursionDepth)
2326     return false;
2327 
2328   SDLoc DL(Op);
2329   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2330 
2331   // Helper for demanding the specified elements and all the bits of both binary
2332   // operands.
2333   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2334     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2335                                                            TLO.DAG, Depth + 1);
2336     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2337                                                            TLO.DAG, Depth + 1);
2338     if (NewOp0 || NewOp1) {
2339       SDValue NewOp = TLO.DAG.getNode(
2340           Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2341       return TLO.CombineTo(Op, NewOp);
2342     }
2343     return false;
2344   };
2345 
2346   switch (Opcode) {
2347   case ISD::SCALAR_TO_VECTOR: {
2348     if (!DemandedElts[0]) {
2349       KnownUndef.setAllBits();
2350       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2351     }
2352     KnownUndef.setHighBits(NumElts - 1);
2353     break;
2354   }
2355   case ISD::BITCAST: {
2356     SDValue Src = Op.getOperand(0);
2357     EVT SrcVT = Src.getValueType();
2358 
2359     // We only handle vectors here.
2360     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2361     if (!SrcVT.isVector())
2362       break;
2363 
2364     // Fast handling of 'identity' bitcasts.
2365     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2366     if (NumSrcElts == NumElts)
2367       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2368                                         KnownZero, TLO, Depth + 1);
2369 
2370     APInt SrcZero, SrcUndef;
2371     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2372 
2373     // Bitcast from 'large element' src vector to 'small element' vector, we
2374     // must demand a source element if any DemandedElt maps to it.
2375     if ((NumElts % NumSrcElts) == 0) {
2376       unsigned Scale = NumElts / NumSrcElts;
2377       for (unsigned i = 0; i != NumElts; ++i)
2378         if (DemandedElts[i])
2379           SrcDemandedElts.setBit(i / Scale);
2380 
2381       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2382                                      TLO, Depth + 1))
2383         return true;
2384 
2385       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2386       // of the large element.
2387       // TODO - bigendian once we have test coverage.
2388       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2389         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2390         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2391         for (unsigned i = 0; i != NumElts; ++i)
2392           if (DemandedElts[i]) {
2393             unsigned Ofs = (i % Scale) * EltSizeInBits;
2394             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2395           }
2396 
2397         KnownBits Known;
2398         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2399                                  TLO, Depth + 1))
2400           return true;
2401       }
2402 
2403       // If the src element is zero/undef then all the output elements will be -
2404       // only demanded elements are guaranteed to be correct.
2405       for (unsigned i = 0; i != NumSrcElts; ++i) {
2406         if (SrcDemandedElts[i]) {
2407           if (SrcZero[i])
2408             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2409           if (SrcUndef[i])
2410             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2411         }
2412       }
2413     }
2414 
2415     // Bitcast from 'small element' src vector to 'large element' vector, we
2416     // demand all smaller source elements covered by the larger demanded element
2417     // of this vector.
2418     if ((NumSrcElts % NumElts) == 0) {
2419       unsigned Scale = NumSrcElts / NumElts;
2420       for (unsigned i = 0; i != NumElts; ++i)
2421         if (DemandedElts[i])
2422           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2423 
2424       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2425                                      TLO, Depth + 1))
2426         return true;
2427 
2428       // If all the src elements covering an output element are zero/undef, then
2429       // the output element will be as well, assuming it was demanded.
2430       for (unsigned i = 0; i != NumElts; ++i) {
2431         if (DemandedElts[i]) {
2432           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2433             KnownZero.setBit(i);
2434           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2435             KnownUndef.setBit(i);
2436         }
2437       }
2438     }
2439     break;
2440   }
2441   case ISD::BUILD_VECTOR: {
2442     // Check all elements and simplify any unused elements with UNDEF.
2443     if (!DemandedElts.isAllOnesValue()) {
2444       // Don't simplify BROADCASTS.
2445       if (llvm::any_of(Op->op_values(),
2446                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2447         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2448         bool Updated = false;
2449         for (unsigned i = 0; i != NumElts; ++i) {
2450           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2451             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2452             KnownUndef.setBit(i);
2453             Updated = true;
2454           }
2455         }
2456         if (Updated)
2457           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2458       }
2459     }
2460     for (unsigned i = 0; i != NumElts; ++i) {
2461       SDValue SrcOp = Op.getOperand(i);
2462       if (SrcOp.isUndef()) {
2463         KnownUndef.setBit(i);
2464       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2465                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2466         KnownZero.setBit(i);
2467       }
2468     }
2469     break;
2470   }
2471   case ISD::CONCAT_VECTORS: {
2472     EVT SubVT = Op.getOperand(0).getValueType();
2473     unsigned NumSubVecs = Op.getNumOperands();
2474     unsigned NumSubElts = SubVT.getVectorNumElements();
2475     for (unsigned i = 0; i != NumSubVecs; ++i) {
2476       SDValue SubOp = Op.getOperand(i);
2477       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2478       APInt SubUndef, SubZero;
2479       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2480                                      Depth + 1))
2481         return true;
2482       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2483       KnownZero.insertBits(SubZero, i * NumSubElts);
2484     }
2485     break;
2486   }
2487   case ISD::INSERT_SUBVECTOR: {
2488     // Demand any elements from the subvector and the remainder from the src its
2489     // inserted into.
2490     SDValue Src = Op.getOperand(0);
2491     SDValue Sub = Op.getOperand(1);
2492     uint64_t Idx = Op.getConstantOperandVal(2);
2493     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2494     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2495     APInt DemandedSrcElts = DemandedElts;
2496     DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
2497 
2498     APInt SubUndef, SubZero;
2499     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2500                                    Depth + 1))
2501       return true;
2502 
2503     // If none of the src operand elements are demanded, replace it with undef.
2504     if (!DemandedSrcElts && !Src.isUndef())
2505       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2506                                                TLO.DAG.getUNDEF(VT), Sub,
2507                                                Op.getOperand(2)));
2508 
2509     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2510                                    TLO, Depth + 1))
2511       return true;
2512     KnownUndef.insertBits(SubUndef, Idx);
2513     KnownZero.insertBits(SubZero, Idx);
2514 
2515     // Attempt to avoid multi-use ops if we don't need anything from them.
2516     if (!DemandedSrcElts.isAllOnesValue() ||
2517         !DemandedSubElts.isAllOnesValue()) {
2518       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2519           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2520       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2521           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2522       if (NewSrc || NewSub) {
2523         NewSrc = NewSrc ? NewSrc : Src;
2524         NewSub = NewSub ? NewSub : Sub;
2525         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2526                                         NewSub, Op.getOperand(2));
2527         return TLO.CombineTo(Op, NewOp);
2528       }
2529     }
2530     break;
2531   }
2532   case ISD::EXTRACT_SUBVECTOR: {
2533     // Offset the demanded elts by the subvector index.
2534     SDValue Src = Op.getOperand(0);
2535     uint64_t Idx = Op.getConstantOperandVal(1);
2536     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2537     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2538 
2539     APInt SrcUndef, SrcZero;
2540     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2541                                    Depth + 1))
2542       return true;
2543     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2544     KnownZero = SrcZero.extractBits(NumElts, Idx);
2545 
2546     // Attempt to avoid multi-use ops if we don't need anything from them.
2547     if (!DemandedElts.isAllOnesValue()) {
2548       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2549           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2550       if (NewSrc) {
2551         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2552                                         Op.getOperand(1));
2553         return TLO.CombineTo(Op, NewOp);
2554       }
2555     }
2556     break;
2557   }
2558   case ISD::INSERT_VECTOR_ELT: {
2559     SDValue Vec = Op.getOperand(0);
2560     SDValue Scl = Op.getOperand(1);
2561     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2562 
2563     // For a legal, constant insertion index, if we don't need this insertion
2564     // then strip it, else remove it from the demanded elts.
2565     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2566       unsigned Idx = CIdx->getZExtValue();
2567       if (!DemandedElts[Idx])
2568         return TLO.CombineTo(Op, Vec);
2569 
2570       APInt DemandedVecElts(DemandedElts);
2571       DemandedVecElts.clearBit(Idx);
2572       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2573                                      KnownZero, TLO, Depth + 1))
2574         return true;
2575 
2576       KnownUndef.clearBit(Idx);
2577       if (Scl.isUndef())
2578         KnownUndef.setBit(Idx);
2579 
2580       KnownZero.clearBit(Idx);
2581       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2582         KnownZero.setBit(Idx);
2583       break;
2584     }
2585 
2586     APInt VecUndef, VecZero;
2587     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2588                                    Depth + 1))
2589       return true;
2590     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2591     break;
2592   }
2593   case ISD::VSELECT: {
2594     // Try to transform the select condition based on the current demanded
2595     // elements.
2596     // TODO: If a condition element is undef, we can choose from one arm of the
2597     //       select (and if one arm is undef, then we can propagate that to the
2598     //       result).
2599     // TODO - add support for constant vselect masks (see IR version of this).
2600     APInt UnusedUndef, UnusedZero;
2601     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2602                                    UnusedZero, TLO, Depth + 1))
2603       return true;
2604 
2605     // See if we can simplify either vselect operand.
2606     APInt DemandedLHS(DemandedElts);
2607     APInt DemandedRHS(DemandedElts);
2608     APInt UndefLHS, ZeroLHS;
2609     APInt UndefRHS, ZeroRHS;
2610     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2611                                    ZeroLHS, TLO, Depth + 1))
2612       return true;
2613     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2614                                    ZeroRHS, TLO, Depth + 1))
2615       return true;
2616 
2617     KnownUndef = UndefLHS & UndefRHS;
2618     KnownZero = ZeroLHS & ZeroRHS;
2619     break;
2620   }
2621   case ISD::VECTOR_SHUFFLE: {
2622     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2623 
2624     // Collect demanded elements from shuffle operands..
2625     APInt DemandedLHS(NumElts, 0);
2626     APInt DemandedRHS(NumElts, 0);
2627     for (unsigned i = 0; i != NumElts; ++i) {
2628       int M = ShuffleMask[i];
2629       if (M < 0 || !DemandedElts[i])
2630         continue;
2631       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2632       if (M < (int)NumElts)
2633         DemandedLHS.setBit(M);
2634       else
2635         DemandedRHS.setBit(M - NumElts);
2636     }
2637 
2638     // See if we can simplify either shuffle operand.
2639     APInt UndefLHS, ZeroLHS;
2640     APInt UndefRHS, ZeroRHS;
2641     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2642                                    ZeroLHS, TLO, Depth + 1))
2643       return true;
2644     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2645                                    ZeroRHS, TLO, Depth + 1))
2646       return true;
2647 
2648     // Simplify mask using undef elements from LHS/RHS.
2649     bool Updated = false;
2650     bool IdentityLHS = true, IdentityRHS = true;
2651     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2652     for (unsigned i = 0; i != NumElts; ++i) {
2653       int &M = NewMask[i];
2654       if (M < 0)
2655         continue;
2656       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2657           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2658         Updated = true;
2659         M = -1;
2660       }
2661       IdentityLHS &= (M < 0) || (M == (int)i);
2662       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2663     }
2664 
2665     // Update legal shuffle masks based on demanded elements if it won't reduce
2666     // to Identity which can cause premature removal of the shuffle mask.
2667     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2668       SDValue LegalShuffle =
2669           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2670                                   NewMask, TLO.DAG);
2671       if (LegalShuffle)
2672         return TLO.CombineTo(Op, LegalShuffle);
2673     }
2674 
2675     // Propagate undef/zero elements from LHS/RHS.
2676     for (unsigned i = 0; i != NumElts; ++i) {
2677       int M = ShuffleMask[i];
2678       if (M < 0) {
2679         KnownUndef.setBit(i);
2680       } else if (M < (int)NumElts) {
2681         if (UndefLHS[M])
2682           KnownUndef.setBit(i);
2683         if (ZeroLHS[M])
2684           KnownZero.setBit(i);
2685       } else {
2686         if (UndefRHS[M - NumElts])
2687           KnownUndef.setBit(i);
2688         if (ZeroRHS[M - NumElts])
2689           KnownZero.setBit(i);
2690       }
2691     }
2692     break;
2693   }
2694   case ISD::ANY_EXTEND_VECTOR_INREG:
2695   case ISD::SIGN_EXTEND_VECTOR_INREG:
2696   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2697     APInt SrcUndef, SrcZero;
2698     SDValue Src = Op.getOperand(0);
2699     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2700     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2701     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2702                                    Depth + 1))
2703       return true;
2704     KnownZero = SrcZero.zextOrTrunc(NumElts);
2705     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2706 
2707     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2708         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2709         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2710       // aext - if we just need the bottom element then we can bitcast.
2711       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2712     }
2713 
2714     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2715       // zext(undef) upper bits are guaranteed to be zero.
2716       if (DemandedElts.isSubsetOf(KnownUndef))
2717         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2718       KnownUndef.clearAllBits();
2719     }
2720     break;
2721   }
2722 
2723   // TODO: There are more binop opcodes that could be handled here - MIN,
2724   // MAX, saturated math, etc.
2725   case ISD::OR:
2726   case ISD::XOR:
2727   case ISD::ADD:
2728   case ISD::SUB:
2729   case ISD::FADD:
2730   case ISD::FSUB:
2731   case ISD::FMUL:
2732   case ISD::FDIV:
2733   case ISD::FREM: {
2734     SDValue Op0 = Op.getOperand(0);
2735     SDValue Op1 = Op.getOperand(1);
2736 
2737     APInt UndefRHS, ZeroRHS;
2738     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2739                                    Depth + 1))
2740       return true;
2741     APInt UndefLHS, ZeroLHS;
2742     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2743                                    Depth + 1))
2744       return true;
2745 
2746     KnownZero = ZeroLHS & ZeroRHS;
2747     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2748 
2749     // Attempt to avoid multi-use ops if we don't need anything from them.
2750     // TODO - use KnownUndef to relax the demandedelts?
2751     if (!DemandedElts.isAllOnesValue())
2752       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2753         return true;
2754     break;
2755   }
2756   case ISD::SHL:
2757   case ISD::SRL:
2758   case ISD::SRA:
2759   case ISD::ROTL:
2760   case ISD::ROTR: {
2761     SDValue Op0 = Op.getOperand(0);
2762     SDValue Op1 = Op.getOperand(1);
2763 
2764     APInt UndefRHS, ZeroRHS;
2765     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2766                                    Depth + 1))
2767       return true;
2768     APInt UndefLHS, ZeroLHS;
2769     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2770                                    Depth + 1))
2771       return true;
2772 
2773     KnownZero = ZeroLHS;
2774     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2775 
2776     // Attempt to avoid multi-use ops if we don't need anything from them.
2777     // TODO - use KnownUndef to relax the demandedelts?
2778     if (!DemandedElts.isAllOnesValue())
2779       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2780         return true;
2781     break;
2782   }
2783   case ISD::MUL:
2784   case ISD::AND: {
2785     SDValue Op0 = Op.getOperand(0);
2786     SDValue Op1 = Op.getOperand(1);
2787 
2788     APInt SrcUndef, SrcZero;
2789     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
2790                                    Depth + 1))
2791       return true;
2792     if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
2793                                    TLO, Depth + 1))
2794       return true;
2795 
2796     // If either side has a zero element, then the result element is zero, even
2797     // if the other is an UNDEF.
2798     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2799     // and then handle 'and' nodes with the rest of the binop opcodes.
2800     KnownZero |= SrcZero;
2801     KnownUndef &= SrcUndef;
2802     KnownUndef &= ~KnownZero;
2803 
2804     // Attempt to avoid multi-use ops if we don't need anything from them.
2805     // TODO - use KnownUndef to relax the demandedelts?
2806     if (!DemandedElts.isAllOnesValue())
2807       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2808         return true;
2809     break;
2810   }
2811   case ISD::TRUNCATE:
2812   case ISD::SIGN_EXTEND:
2813   case ISD::ZERO_EXTEND:
2814     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2815                                    KnownZero, TLO, Depth + 1))
2816       return true;
2817 
2818     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2819       // zext(undef) upper bits are guaranteed to be zero.
2820       if (DemandedElts.isSubsetOf(KnownUndef))
2821         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2822       KnownUndef.clearAllBits();
2823     }
2824     break;
2825   default: {
2826     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2827       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2828                                                   KnownZero, TLO, Depth))
2829         return true;
2830     } else {
2831       KnownBits Known;
2832       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2833       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2834                                TLO, Depth, AssumeSingleUse))
2835         return true;
2836     }
2837     break;
2838   }
2839   }
2840   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2841 
2842   // Constant fold all undef cases.
2843   // TODO: Handle zero cases as well.
2844   if (DemandedElts.isSubsetOf(KnownUndef))
2845     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2846 
2847   return false;
2848 }
2849 
2850 /// Determine which of the bits specified in Mask are known to be either zero or
2851 /// one and return them in the Known.
2852 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2853                                                    KnownBits &Known,
2854                                                    const APInt &DemandedElts,
2855                                                    const SelectionDAG &DAG,
2856                                                    unsigned Depth) const {
2857   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2858           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2859           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2860           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2861          "Should use MaskedValueIsZero if you don't know whether Op"
2862          " is a target node!");
2863   Known.resetAll();
2864 }
2865 
2866 void TargetLowering::computeKnownBitsForTargetInstr(
2867     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2868     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2869     unsigned Depth) const {
2870   Known.resetAll();
2871 }
2872 
2873 void TargetLowering::computeKnownBitsForFrameIndex(
2874   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
2875   // The low bits are known zero if the pointer is aligned.
2876   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
2877 }
2878 
2879 Align TargetLowering::computeKnownAlignForTargetInstr(
2880   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2881   unsigned Depth) const {
2882   return Align(1);
2883 }
2884 
2885 /// This method can be implemented by targets that want to expose additional
2886 /// information about sign bits to the DAG Combiner.
2887 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2888                                                          const APInt &,
2889                                                          const SelectionDAG &,
2890                                                          unsigned Depth) const {
2891   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2892           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2893           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2894           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2895          "Should use ComputeNumSignBits if you don't know whether Op"
2896          " is a target node!");
2897   return 1;
2898 }
2899 
2900 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
2901   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
2902   const MachineRegisterInfo &MRI, unsigned Depth) const {
2903   return 1;
2904 }
2905 
2906 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2907     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2908     TargetLoweringOpt &TLO, unsigned Depth) const {
2909   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2910           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2911           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2912           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2913          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2914          " is a target node!");
2915   return false;
2916 }
2917 
2918 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2919     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2920     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2921   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2922           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2923           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2924           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2925          "Should use SimplifyDemandedBits if you don't know whether Op"
2926          " is a target node!");
2927   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2928   return false;
2929 }
2930 
2931 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2932     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2933     SelectionDAG &DAG, unsigned Depth) const {
2934   assert(
2935       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2936        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2937        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2938        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2939       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2940       " is a target node!");
2941   return SDValue();
2942 }
2943 
2944 SDValue
2945 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2946                                         SDValue N1, MutableArrayRef<int> Mask,
2947                                         SelectionDAG &DAG) const {
2948   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2949   if (!LegalMask) {
2950     std::swap(N0, N1);
2951     ShuffleVectorSDNode::commuteMask(Mask);
2952     LegalMask = isShuffleMaskLegal(Mask, VT);
2953   }
2954 
2955   if (!LegalMask)
2956     return SDValue();
2957 
2958   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2959 }
2960 
2961 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2962   return nullptr;
2963 }
2964 
2965 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2966                                                   const SelectionDAG &DAG,
2967                                                   bool SNaN,
2968                                                   unsigned Depth) const {
2969   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2970           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2971           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2972           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2973          "Should use isKnownNeverNaN if you don't know whether Op"
2974          " is a target node!");
2975   return false;
2976 }
2977 
2978 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2979 // work with truncating build vectors and vectors with elements of less than
2980 // 8 bits.
2981 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2982   if (!N)
2983     return false;
2984 
2985   APInt CVal;
2986   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2987     CVal = CN->getAPIntValue();
2988   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2989     auto *CN = BV->getConstantSplatNode();
2990     if (!CN)
2991       return false;
2992 
2993     // If this is a truncating build vector, truncate the splat value.
2994     // Otherwise, we may fail to match the expected values below.
2995     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2996     CVal = CN->getAPIntValue();
2997     if (BVEltWidth < CVal.getBitWidth())
2998       CVal = CVal.trunc(BVEltWidth);
2999   } else {
3000     return false;
3001   }
3002 
3003   switch (getBooleanContents(N->getValueType(0))) {
3004   case UndefinedBooleanContent:
3005     return CVal[0];
3006   case ZeroOrOneBooleanContent:
3007     return CVal.isOneValue();
3008   case ZeroOrNegativeOneBooleanContent:
3009     return CVal.isAllOnesValue();
3010   }
3011 
3012   llvm_unreachable("Invalid boolean contents");
3013 }
3014 
3015 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
3016   if (!N)
3017     return false;
3018 
3019   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3020   if (!CN) {
3021     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3022     if (!BV)
3023       return false;
3024 
3025     // Only interested in constant splats, we don't care about undef
3026     // elements in identifying boolean constants and getConstantSplatNode
3027     // returns NULL if all ops are undef;
3028     CN = BV->getConstantSplatNode();
3029     if (!CN)
3030       return false;
3031   }
3032 
3033   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3034     return !CN->getAPIntValue()[0];
3035 
3036   return CN->isNullValue();
3037 }
3038 
3039 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3040                                        bool SExt) const {
3041   if (VT == MVT::i1)
3042     return N->isOne();
3043 
3044   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3045   switch (Cnt) {
3046   case TargetLowering::ZeroOrOneBooleanContent:
3047     // An extended value of 1 is always true, unless its original type is i1,
3048     // in which case it will be sign extended to -1.
3049     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3050   case TargetLowering::UndefinedBooleanContent:
3051   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3052     return N->isAllOnesValue() && SExt;
3053   }
3054   llvm_unreachable("Unexpected enumeration.");
3055 }
3056 
3057 /// This helper function of SimplifySetCC tries to optimize the comparison when
3058 /// either operand of the SetCC node is a bitwise-and instruction.
3059 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3060                                          ISD::CondCode Cond, const SDLoc &DL,
3061                                          DAGCombinerInfo &DCI) const {
3062   // Match these patterns in any of their permutations:
3063   // (X & Y) == Y
3064   // (X & Y) != Y
3065   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3066     std::swap(N0, N1);
3067 
3068   EVT OpVT = N0.getValueType();
3069   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3070       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3071     return SDValue();
3072 
3073   SDValue X, Y;
3074   if (N0.getOperand(0) == N1) {
3075     X = N0.getOperand(1);
3076     Y = N0.getOperand(0);
3077   } else if (N0.getOperand(1) == N1) {
3078     X = N0.getOperand(0);
3079     Y = N0.getOperand(1);
3080   } else {
3081     return SDValue();
3082   }
3083 
3084   SelectionDAG &DAG = DCI.DAG;
3085   SDValue Zero = DAG.getConstant(0, DL, OpVT);
3086   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3087     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3088     // Note that where Y is variable and is known to have at most one bit set
3089     // (for example, if it is Z & 1) we cannot do this; the expressions are not
3090     // equivalent when Y == 0.
3091     assert(OpVT.isInteger());
3092     Cond = ISD::getSetCCInverse(Cond, OpVT);
3093     if (DCI.isBeforeLegalizeOps() ||
3094         isCondCodeLegal(Cond, N0.getSimpleValueType()))
3095       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3096   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3097     // If the target supports an 'and-not' or 'and-complement' logic operation,
3098     // try to use that to make a comparison operation more efficient.
3099     // But don't do this transform if the mask is a single bit because there are
3100     // more efficient ways to deal with that case (for example, 'bt' on x86 or
3101     // 'rlwinm' on PPC).
3102 
3103     // Bail out if the compare operand that we want to turn into a zero is
3104     // already a zero (otherwise, infinite loop).
3105     auto *YConst = dyn_cast<ConstantSDNode>(Y);
3106     if (YConst && YConst->isNullValue())
3107       return SDValue();
3108 
3109     // Transform this into: ~X & Y == 0.
3110     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3111     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3112     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3113   }
3114 
3115   return SDValue();
3116 }
3117 
3118 /// There are multiple IR patterns that could be checking whether certain
3119 /// truncation of a signed number would be lossy or not. The pattern which is
3120 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3121 /// We are looking for the following pattern: (KeptBits is a constant)
3122 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3123 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3124 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
3125 /// We will unfold it into the natural trunc+sext pattern:
3126 ///   ((%x << C) a>> C) dstcond %x
3127 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
3128 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3129     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3130     const SDLoc &DL) const {
3131   // We must be comparing with a constant.
3132   ConstantSDNode *C1;
3133   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3134     return SDValue();
3135 
3136   // N0 should be:  add %x, (1 << (KeptBits-1))
3137   if (N0->getOpcode() != ISD::ADD)
3138     return SDValue();
3139 
3140   // And we must be 'add'ing a constant.
3141   ConstantSDNode *C01;
3142   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3143     return SDValue();
3144 
3145   SDValue X = N0->getOperand(0);
3146   EVT XVT = X.getValueType();
3147 
3148   // Validate constants ...
3149 
3150   APInt I1 = C1->getAPIntValue();
3151 
3152   ISD::CondCode NewCond;
3153   if (Cond == ISD::CondCode::SETULT) {
3154     NewCond = ISD::CondCode::SETEQ;
3155   } else if (Cond == ISD::CondCode::SETULE) {
3156     NewCond = ISD::CondCode::SETEQ;
3157     // But need to 'canonicalize' the constant.
3158     I1 += 1;
3159   } else if (Cond == ISD::CondCode::SETUGT) {
3160     NewCond = ISD::CondCode::SETNE;
3161     // But need to 'canonicalize' the constant.
3162     I1 += 1;
3163   } else if (Cond == ISD::CondCode::SETUGE) {
3164     NewCond = ISD::CondCode::SETNE;
3165   } else
3166     return SDValue();
3167 
3168   APInt I01 = C01->getAPIntValue();
3169 
3170   auto checkConstants = [&I1, &I01]() -> bool {
3171     // Both of them must be power-of-two, and the constant from setcc is bigger.
3172     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3173   };
3174 
3175   if (checkConstants()) {
3176     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
3177   } else {
3178     // What if we invert constants? (and the target predicate)
3179     I1.negate();
3180     I01.negate();
3181     assert(XVT.isInteger());
3182     NewCond = getSetCCInverse(NewCond, XVT);
3183     if (!checkConstants())
3184       return SDValue();
3185     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
3186   }
3187 
3188   // They are power-of-two, so which bit is set?
3189   const unsigned KeptBits = I1.logBase2();
3190   const unsigned KeptBitsMinusOne = I01.logBase2();
3191 
3192   // Magic!
3193   if (KeptBits != (KeptBitsMinusOne + 1))
3194     return SDValue();
3195   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3196 
3197   // We don't want to do this in every single case.
3198   SelectionDAG &DAG = DCI.DAG;
3199   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3200           XVT, KeptBits))
3201     return SDValue();
3202 
3203   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3204   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3205 
3206   // Unfold into:  ((%x << C) a>> C) cond %x
3207   // Where 'cond' will be either 'eq' or 'ne'.
3208   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3209   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3210   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3211   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3212 
3213   return T2;
3214 }
3215 
3216 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3217 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3218     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3219     DAGCombinerInfo &DCI, const SDLoc &DL) const {
3220   assert(isConstOrConstSplat(N1C) &&
3221          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3222          "Should be a comparison with 0.");
3223   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3224          "Valid only for [in]equality comparisons.");
3225 
3226   unsigned NewShiftOpcode;
3227   SDValue X, C, Y;
3228 
3229   SelectionDAG &DAG = DCI.DAG;
3230   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3231 
3232   // Look for '(C l>>/<< Y)'.
3233   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3234     // The shift should be one-use.
3235     if (!V.hasOneUse())
3236       return false;
3237     unsigned OldShiftOpcode = V.getOpcode();
3238     switch (OldShiftOpcode) {
3239     case ISD::SHL:
3240       NewShiftOpcode = ISD::SRL;
3241       break;
3242     case ISD::SRL:
3243       NewShiftOpcode = ISD::SHL;
3244       break;
3245     default:
3246       return false; // must be a logical shift.
3247     }
3248     // We should be shifting a constant.
3249     // FIXME: best to use isConstantOrConstantVector().
3250     C = V.getOperand(0);
3251     ConstantSDNode *CC =
3252         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3253     if (!CC)
3254       return false;
3255     Y = V.getOperand(1);
3256 
3257     ConstantSDNode *XC =
3258         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3259     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3260         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3261   };
3262 
3263   // LHS of comparison should be an one-use 'and'.
3264   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3265     return SDValue();
3266 
3267   X = N0.getOperand(0);
3268   SDValue Mask = N0.getOperand(1);
3269 
3270   // 'and' is commutative!
3271   if (!Match(Mask)) {
3272     std::swap(X, Mask);
3273     if (!Match(Mask))
3274       return SDValue();
3275   }
3276 
3277   EVT VT = X.getValueType();
3278 
3279   // Produce:
3280   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3281   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3282   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3283   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3284   return T2;
3285 }
3286 
3287 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3288 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3289 /// handle the commuted versions of these patterns.
3290 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3291                                            ISD::CondCode Cond, const SDLoc &DL,
3292                                            DAGCombinerInfo &DCI) const {
3293   unsigned BOpcode = N0.getOpcode();
3294   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3295          "Unexpected binop");
3296   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3297 
3298   // (X + Y) == X --> Y == 0
3299   // (X - Y) == X --> Y == 0
3300   // (X ^ Y) == X --> Y == 0
3301   SelectionDAG &DAG = DCI.DAG;
3302   EVT OpVT = N0.getValueType();
3303   SDValue X = N0.getOperand(0);
3304   SDValue Y = N0.getOperand(1);
3305   if (X == N1)
3306     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3307 
3308   if (Y != N1)
3309     return SDValue();
3310 
3311   // (X + Y) == Y --> X == 0
3312   // (X ^ Y) == Y --> X == 0
3313   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3314     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3315 
3316   // The shift would not be valid if the operands are boolean (i1).
3317   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3318     return SDValue();
3319 
3320   // (X - Y) == Y --> X == Y << 1
3321   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3322                                  !DCI.isBeforeLegalize());
3323   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3324   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3325   if (!DCI.isCalledByLegalizer())
3326     DCI.AddToWorklist(YShl1.getNode());
3327   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3328 }
3329 
3330 /// Try to simplify a setcc built with the specified operands and cc. If it is
3331 /// unable to simplify it, return a null SDValue.
3332 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3333                                       ISD::CondCode Cond, bool foldBooleans,
3334                                       DAGCombinerInfo &DCI,
3335                                       const SDLoc &dl) const {
3336   SelectionDAG &DAG = DCI.DAG;
3337   const DataLayout &Layout = DAG.getDataLayout();
3338   EVT OpVT = N0.getValueType();
3339 
3340   // Constant fold or commute setcc.
3341   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3342     return Fold;
3343 
3344   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3345   // TODO: Handle non-splat vector constants. All undef causes trouble.
3346   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3347   if (isConstOrConstSplat(N0) &&
3348       (DCI.isBeforeLegalizeOps() ||
3349        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3350     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3351 
3352   // If we have a subtract with the same 2 non-constant operands as this setcc
3353   // -- but in reverse order -- then try to commute the operands of this setcc
3354   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3355   // instruction on some targets.
3356   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3357       (DCI.isBeforeLegalizeOps() ||
3358        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3359       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3360       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3361     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3362 
3363   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3364     const APInt &C1 = N1C->getAPIntValue();
3365 
3366     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3367     // equality comparison, then we're just comparing whether X itself is
3368     // zero.
3369     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3370         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3371         N0.getOperand(1).getOpcode() == ISD::Constant) {
3372       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3373       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3374           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3375         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3376           // (srl (ctlz x), 5) == 0  -> X != 0
3377           // (srl (ctlz x), 5) != 1  -> X != 0
3378           Cond = ISD::SETNE;
3379         } else {
3380           // (srl (ctlz x), 5) != 0  -> X == 0
3381           // (srl (ctlz x), 5) == 1  -> X == 0
3382           Cond = ISD::SETEQ;
3383         }
3384         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3385         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3386                             Zero, Cond);
3387       }
3388     }
3389 
3390     SDValue CTPOP = N0;
3391     // Look through truncs that don't change the value of a ctpop.
3392     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3393       CTPOP = N0.getOperand(0);
3394 
3395     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3396         (N0 == CTPOP ||
3397          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3398       EVT CTVT = CTPOP.getValueType();
3399       SDValue CTOp = CTPOP.getOperand(0);
3400 
3401       // (ctpop x) u< 2 -> (x & x-1) == 0
3402       // (ctpop x) u> 1 -> (x & x-1) != 0
3403       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3404         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3405         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3406         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3407         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3408         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3409       }
3410 
3411       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3412       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3413           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3414         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3415         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3416         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3417         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3418         assert(CTVT.isInteger());
3419         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3420         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3421         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3422         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3423         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3424         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3425         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3426       }
3427     }
3428 
3429     // (zext x) == C --> x == (trunc C)
3430     // (sext x) == C --> x == (trunc C)
3431     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3432         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3433       unsigned MinBits = N0.getValueSizeInBits();
3434       SDValue PreExt;
3435       bool Signed = false;
3436       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3437         // ZExt
3438         MinBits = N0->getOperand(0).getValueSizeInBits();
3439         PreExt = N0->getOperand(0);
3440       } else if (N0->getOpcode() == ISD::AND) {
3441         // DAGCombine turns costly ZExts into ANDs
3442         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3443           if ((C->getAPIntValue()+1).isPowerOf2()) {
3444             MinBits = C->getAPIntValue().countTrailingOnes();
3445             PreExt = N0->getOperand(0);
3446           }
3447       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3448         // SExt
3449         MinBits = N0->getOperand(0).getValueSizeInBits();
3450         PreExt = N0->getOperand(0);
3451         Signed = true;
3452       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3453         // ZEXTLOAD / SEXTLOAD
3454         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3455           MinBits = LN0->getMemoryVT().getSizeInBits();
3456           PreExt = N0;
3457         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3458           Signed = true;
3459           MinBits = LN0->getMemoryVT().getSizeInBits();
3460           PreExt = N0;
3461         }
3462       }
3463 
3464       // Figure out how many bits we need to preserve this constant.
3465       unsigned ReqdBits = Signed ?
3466         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3467         C1.getActiveBits();
3468 
3469       // Make sure we're not losing bits from the constant.
3470       if (MinBits > 0 &&
3471           MinBits < C1.getBitWidth() &&
3472           MinBits >= ReqdBits) {
3473         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3474         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3475           // Will get folded away.
3476           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3477           if (MinBits == 1 && C1 == 1)
3478             // Invert the condition.
3479             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3480                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3481           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3482           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3483         }
3484 
3485         // If truncating the setcc operands is not desirable, we can still
3486         // simplify the expression in some cases:
3487         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3488         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3489         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3490         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3491         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3492         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3493         SDValue TopSetCC = N0->getOperand(0);
3494         unsigned N0Opc = N0->getOpcode();
3495         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3496         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3497             TopSetCC.getOpcode() == ISD::SETCC &&
3498             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3499             (isConstFalseVal(N1C) ||
3500              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3501 
3502           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3503                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3504 
3505           if (!Inverse)
3506             return TopSetCC;
3507 
3508           ISD::CondCode InvCond = ISD::getSetCCInverse(
3509               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3510               TopSetCC.getOperand(0).getValueType());
3511           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3512                                       TopSetCC.getOperand(1),
3513                                       InvCond);
3514         }
3515       }
3516     }
3517 
3518     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3519     // equality or unsigned, and all 1 bits of the const are in the same
3520     // partial word, see if we can shorten the load.
3521     if (DCI.isBeforeLegalize() &&
3522         !ISD::isSignedIntSetCC(Cond) &&
3523         N0.getOpcode() == ISD::AND && C1 == 0 &&
3524         N0.getNode()->hasOneUse() &&
3525         isa<LoadSDNode>(N0.getOperand(0)) &&
3526         N0.getOperand(0).getNode()->hasOneUse() &&
3527         isa<ConstantSDNode>(N0.getOperand(1))) {
3528       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3529       APInt bestMask;
3530       unsigned bestWidth = 0, bestOffset = 0;
3531       if (Lod->isSimple() && Lod->isUnindexed()) {
3532         unsigned origWidth = N0.getValueSizeInBits();
3533         unsigned maskWidth = origWidth;
3534         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3535         // 8 bits, but have to be careful...
3536         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3537           origWidth = Lod->getMemoryVT().getSizeInBits();
3538         const APInt &Mask = N0.getConstantOperandAPInt(1);
3539         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3540           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3541           for (unsigned offset=0; offset<origWidth/width; offset++) {
3542             if (Mask.isSubsetOf(newMask)) {
3543               if (Layout.isLittleEndian())
3544                 bestOffset = (uint64_t)offset * (width/8);
3545               else
3546                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3547               bestMask = Mask.lshr(offset * (width/8) * 8);
3548               bestWidth = width;
3549               break;
3550             }
3551             newMask <<= width;
3552           }
3553         }
3554       }
3555       if (bestWidth) {
3556         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3557         if (newVT.isRound() &&
3558             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3559           SDValue Ptr = Lod->getBasePtr();
3560           if (bestOffset != 0)
3561             Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
3562           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3563           SDValue NewLoad = DAG.getLoad(
3564               newVT, dl, Lod->getChain(), Ptr,
3565               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3566           return DAG.getSetCC(dl, VT,
3567                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3568                                       DAG.getConstant(bestMask.trunc(bestWidth),
3569                                                       dl, newVT)),
3570                               DAG.getConstant(0LL, dl, newVT), Cond);
3571         }
3572       }
3573     }
3574 
3575     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3576     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3577       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3578 
3579       // If the comparison constant has bits in the upper part, the
3580       // zero-extended value could never match.
3581       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3582                                               C1.getBitWidth() - InSize))) {
3583         switch (Cond) {
3584         case ISD::SETUGT:
3585         case ISD::SETUGE:
3586         case ISD::SETEQ:
3587           return DAG.getConstant(0, dl, VT);
3588         case ISD::SETULT:
3589         case ISD::SETULE:
3590         case ISD::SETNE:
3591           return DAG.getConstant(1, dl, VT);
3592         case ISD::SETGT:
3593         case ISD::SETGE:
3594           // True if the sign bit of C1 is set.
3595           return DAG.getConstant(C1.isNegative(), dl, VT);
3596         case ISD::SETLT:
3597         case ISD::SETLE:
3598           // True if the sign bit of C1 isn't set.
3599           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3600         default:
3601           break;
3602         }
3603       }
3604 
3605       // Otherwise, we can perform the comparison with the low bits.
3606       switch (Cond) {
3607       case ISD::SETEQ:
3608       case ISD::SETNE:
3609       case ISD::SETUGT:
3610       case ISD::SETUGE:
3611       case ISD::SETULT:
3612       case ISD::SETULE: {
3613         EVT newVT = N0.getOperand(0).getValueType();
3614         if (DCI.isBeforeLegalizeOps() ||
3615             (isOperationLegal(ISD::SETCC, newVT) &&
3616              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3617           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3618           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3619 
3620           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3621                                           NewConst, Cond);
3622           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3623         }
3624         break;
3625       }
3626       default:
3627         break; // todo, be more careful with signed comparisons
3628       }
3629     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3630                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3631       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3632       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3633       EVT ExtDstTy = N0.getValueType();
3634       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3635 
3636       // If the constant doesn't fit into the number of bits for the source of
3637       // the sign extension, it is impossible for both sides to be equal.
3638       if (C1.getMinSignedBits() > ExtSrcTyBits)
3639         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3640 
3641       SDValue ZextOp;
3642       EVT Op0Ty = N0.getOperand(0).getValueType();
3643       if (Op0Ty == ExtSrcTy) {
3644         ZextOp = N0.getOperand(0);
3645       } else {
3646         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3647         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3648                              DAG.getConstant(Imm, dl, Op0Ty));
3649       }
3650       if (!DCI.isCalledByLegalizer())
3651         DCI.AddToWorklist(ZextOp.getNode());
3652       // Otherwise, make this a use of a zext.
3653       return DAG.getSetCC(dl, VT, ZextOp,
3654                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3655                                                               ExtDstTyBits,
3656                                                               ExtSrcTyBits),
3657                                           dl, ExtDstTy),
3658                           Cond);
3659     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3660                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3661       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3662       if (N0.getOpcode() == ISD::SETCC &&
3663           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3664           (N0.getValueType() == MVT::i1 ||
3665            getBooleanContents(N0.getOperand(0).getValueType()) ==
3666                        ZeroOrOneBooleanContent)) {
3667         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3668         if (TrueWhenTrue)
3669           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3670         // Invert the condition.
3671         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3672         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3673         if (DCI.isBeforeLegalizeOps() ||
3674             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3675           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3676       }
3677 
3678       if ((N0.getOpcode() == ISD::XOR ||
3679            (N0.getOpcode() == ISD::AND &&
3680             N0.getOperand(0).getOpcode() == ISD::XOR &&
3681             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3682           isa<ConstantSDNode>(N0.getOperand(1)) &&
3683           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3684         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3685         // can only do this if the top bits are known zero.
3686         unsigned BitWidth = N0.getValueSizeInBits();
3687         if (DAG.MaskedValueIsZero(N0,
3688                                   APInt::getHighBitsSet(BitWidth,
3689                                                         BitWidth-1))) {
3690           // Okay, get the un-inverted input value.
3691           SDValue Val;
3692           if (N0.getOpcode() == ISD::XOR) {
3693             Val = N0.getOperand(0);
3694           } else {
3695             assert(N0.getOpcode() == ISD::AND &&
3696                     N0.getOperand(0).getOpcode() == ISD::XOR);
3697             // ((X^1)&1)^1 -> X & 1
3698             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3699                               N0.getOperand(0).getOperand(0),
3700                               N0.getOperand(1));
3701           }
3702 
3703           return DAG.getSetCC(dl, VT, Val, N1,
3704                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3705         }
3706       } else if (N1C->isOne()) {
3707         SDValue Op0 = N0;
3708         if (Op0.getOpcode() == ISD::TRUNCATE)
3709           Op0 = Op0.getOperand(0);
3710 
3711         if ((Op0.getOpcode() == ISD::XOR) &&
3712             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3713             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3714           SDValue XorLHS = Op0.getOperand(0);
3715           SDValue XorRHS = Op0.getOperand(1);
3716           // Ensure that the input setccs return an i1 type or 0/1 value.
3717           if (Op0.getValueType() == MVT::i1 ||
3718               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3719                       ZeroOrOneBooleanContent &&
3720                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3721                         ZeroOrOneBooleanContent)) {
3722             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3723             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3724             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3725           }
3726         }
3727         if (Op0.getOpcode() == ISD::AND &&
3728             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3729             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3730           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3731           if (Op0.getValueType().bitsGT(VT))
3732             Op0 = DAG.getNode(ISD::AND, dl, VT,
3733                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3734                           DAG.getConstant(1, dl, VT));
3735           else if (Op0.getValueType().bitsLT(VT))
3736             Op0 = DAG.getNode(ISD::AND, dl, VT,
3737                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3738                         DAG.getConstant(1, dl, VT));
3739 
3740           return DAG.getSetCC(dl, VT, Op0,
3741                               DAG.getConstant(0, dl, Op0.getValueType()),
3742                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3743         }
3744         if (Op0.getOpcode() == ISD::AssertZext &&
3745             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3746           return DAG.getSetCC(dl, VT, Op0,
3747                               DAG.getConstant(0, dl, Op0.getValueType()),
3748                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3749       }
3750     }
3751 
3752     // Given:
3753     //   icmp eq/ne (urem %x, %y), 0
3754     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3755     //   icmp eq/ne %x, 0
3756     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3757         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3758       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3759       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3760       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3761         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3762     }
3763 
3764     if (SDValue V =
3765             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3766       return V;
3767   }
3768 
3769   // These simplifications apply to splat vectors as well.
3770   // TODO: Handle more splat vector cases.
3771   if (auto *N1C = isConstOrConstSplat(N1)) {
3772     const APInt &C1 = N1C->getAPIntValue();
3773 
3774     APInt MinVal, MaxVal;
3775     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3776     if (ISD::isSignedIntSetCC(Cond)) {
3777       MinVal = APInt::getSignedMinValue(OperandBitSize);
3778       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3779     } else {
3780       MinVal = APInt::getMinValue(OperandBitSize);
3781       MaxVal = APInt::getMaxValue(OperandBitSize);
3782     }
3783 
3784     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3785     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3786       // X >= MIN --> true
3787       if (C1 == MinVal)
3788         return DAG.getBoolConstant(true, dl, VT, OpVT);
3789 
3790       if (!VT.isVector()) { // TODO: Support this for vectors.
3791         // X >= C0 --> X > (C0 - 1)
3792         APInt C = C1 - 1;
3793         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3794         if ((DCI.isBeforeLegalizeOps() ||
3795              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3796             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3797                                   isLegalICmpImmediate(C.getSExtValue())))) {
3798           return DAG.getSetCC(dl, VT, N0,
3799                               DAG.getConstant(C, dl, N1.getValueType()),
3800                               NewCC);
3801         }
3802       }
3803     }
3804 
3805     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3806       // X <= MAX --> true
3807       if (C1 == MaxVal)
3808         return DAG.getBoolConstant(true, dl, VT, OpVT);
3809 
3810       // X <= C0 --> X < (C0 + 1)
3811       if (!VT.isVector()) { // TODO: Support this for vectors.
3812         APInt C = C1 + 1;
3813         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3814         if ((DCI.isBeforeLegalizeOps() ||
3815              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3816             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3817                                   isLegalICmpImmediate(C.getSExtValue())))) {
3818           return DAG.getSetCC(dl, VT, N0,
3819                               DAG.getConstant(C, dl, N1.getValueType()),
3820                               NewCC);
3821         }
3822       }
3823     }
3824 
3825     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3826       if (C1 == MinVal)
3827         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3828 
3829       // TODO: Support this for vectors after legalize ops.
3830       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3831         // Canonicalize setlt X, Max --> setne X, Max
3832         if (C1 == MaxVal)
3833           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3834 
3835         // If we have setult X, 1, turn it into seteq X, 0
3836         if (C1 == MinVal+1)
3837           return DAG.getSetCC(dl, VT, N0,
3838                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3839                               ISD::SETEQ);
3840       }
3841     }
3842 
3843     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3844       if (C1 == MaxVal)
3845         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3846 
3847       // TODO: Support this for vectors after legalize ops.
3848       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3849         // Canonicalize setgt X, Min --> setne X, Min
3850         if (C1 == MinVal)
3851           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3852 
3853         // If we have setugt X, Max-1, turn it into seteq X, Max
3854         if (C1 == MaxVal-1)
3855           return DAG.getSetCC(dl, VT, N0,
3856                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3857                               ISD::SETEQ);
3858       }
3859     }
3860 
3861     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3862       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3863       if (C1.isNullValue())
3864         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3865                 VT, N0, N1, Cond, DCI, dl))
3866           return CC;
3867     }
3868 
3869     // If we have "setcc X, C0", check to see if we can shrink the immediate
3870     // by changing cc.
3871     // TODO: Support this for vectors after legalize ops.
3872     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3873       // SETUGT X, SINTMAX  -> SETLT X, 0
3874       if (Cond == ISD::SETUGT &&
3875           C1 == APInt::getSignedMaxValue(OperandBitSize))
3876         return DAG.getSetCC(dl, VT, N0,
3877                             DAG.getConstant(0, dl, N1.getValueType()),
3878                             ISD::SETLT);
3879 
3880       // SETULT X, SINTMIN  -> SETGT X, -1
3881       if (Cond == ISD::SETULT &&
3882           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3883         SDValue ConstMinusOne =
3884             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3885                             N1.getValueType());
3886         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3887       }
3888     }
3889   }
3890 
3891   // Back to non-vector simplifications.
3892   // TODO: Can we do these for vector splats?
3893   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3894     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3895     const APInt &C1 = N1C->getAPIntValue();
3896     EVT ShValTy = N0.getValueType();
3897 
3898     // Fold bit comparisons when we can.
3899     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3900         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3901         N0.getOpcode() == ISD::AND) {
3902       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3903         EVT ShiftTy =
3904             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3905         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3906           // Perform the xform if the AND RHS is a single bit.
3907           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3908           if (AndRHS->getAPIntValue().isPowerOf2() &&
3909               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3910             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3911                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3912                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3913           }
3914         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3915           // (X & 8) == 8  -->  (X & 8) >> 3
3916           // Perform the xform if C1 is a single bit.
3917           unsigned ShCt = C1.logBase2();
3918           if (C1.isPowerOf2() &&
3919               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3920             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3921                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3922                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3923           }
3924         }
3925       }
3926     }
3927 
3928     if (C1.getMinSignedBits() <= 64 &&
3929         !isLegalICmpImmediate(C1.getSExtValue())) {
3930       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3931       // (X & -256) == 256 -> (X >> 8) == 1
3932       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3933           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3934         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3935           const APInt &AndRHSC = AndRHS->getAPIntValue();
3936           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3937             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3938             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3939               SDValue Shift =
3940                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3941                             DAG.getConstant(ShiftBits, dl, ShiftTy));
3942               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3943               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3944             }
3945           }
3946         }
3947       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3948                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3949         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3950         // X <  0x100000000 -> (X >> 32) <  1
3951         // X >= 0x100000000 -> (X >> 32) >= 1
3952         // X <= 0x0ffffffff -> (X >> 32) <  1
3953         // X >  0x0ffffffff -> (X >> 32) >= 1
3954         unsigned ShiftBits;
3955         APInt NewC = C1;
3956         ISD::CondCode NewCond = Cond;
3957         if (AdjOne) {
3958           ShiftBits = C1.countTrailingOnes();
3959           NewC = NewC + 1;
3960           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3961         } else {
3962           ShiftBits = C1.countTrailingZeros();
3963         }
3964         NewC.lshrInPlace(ShiftBits);
3965         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3966             isLegalICmpImmediate(NewC.getSExtValue()) &&
3967             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3968           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3969                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3970           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3971           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3972         }
3973       }
3974     }
3975   }
3976 
3977   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3978     auto *CFP = cast<ConstantFPSDNode>(N1);
3979     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3980 
3981     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3982     // constant if knowing that the operand is non-nan is enough.  We prefer to
3983     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3984     // materialize 0.0.
3985     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3986       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3987 
3988     // setcc (fneg x), C -> setcc swap(pred) x, -C
3989     if (N0.getOpcode() == ISD::FNEG) {
3990       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3991       if (DCI.isBeforeLegalizeOps() ||
3992           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3993         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3994         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3995       }
3996     }
3997 
3998     // If the condition is not legal, see if we can find an equivalent one
3999     // which is legal.
4000     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4001       // If the comparison was an awkward floating-point == or != and one of
4002       // the comparison operands is infinity or negative infinity, convert the
4003       // condition to a less-awkward <= or >=.
4004       if (CFP->getValueAPF().isInfinity()) {
4005         bool IsNegInf = CFP->getValueAPF().isNegative();
4006         ISD::CondCode NewCond = ISD::SETCC_INVALID;
4007         switch (Cond) {
4008         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4009         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4010         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4011         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4012         default: break;
4013         }
4014         if (NewCond != ISD::SETCC_INVALID &&
4015             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4016           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4017       }
4018     }
4019   }
4020 
4021   if (N0 == N1) {
4022     // The sext(setcc()) => setcc() optimization relies on the appropriate
4023     // constant being emitted.
4024     assert(!N0.getValueType().isInteger() &&
4025            "Integer types should be handled by FoldSetCC");
4026 
4027     bool EqTrue = ISD::isTrueWhenEqual(Cond);
4028     unsigned UOF = ISD::getUnorderedFlavor(Cond);
4029     if (UOF == 2) // FP operators that are undefined on NaNs.
4030       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4031     if (UOF == unsigned(EqTrue))
4032       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4033     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
4034     // if it is not already.
4035     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4036     if (NewCond != Cond &&
4037         (DCI.isBeforeLegalizeOps() ||
4038                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4039       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4040   }
4041 
4042   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4043       N0.getValueType().isInteger()) {
4044     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4045         N0.getOpcode() == ISD::XOR) {
4046       // Simplify (X+Y) == (X+Z) -->  Y == Z
4047       if (N0.getOpcode() == N1.getOpcode()) {
4048         if (N0.getOperand(0) == N1.getOperand(0))
4049           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4050         if (N0.getOperand(1) == N1.getOperand(1))
4051           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4052         if (isCommutativeBinOp(N0.getOpcode())) {
4053           // If X op Y == Y op X, try other combinations.
4054           if (N0.getOperand(0) == N1.getOperand(1))
4055             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4056                                 Cond);
4057           if (N0.getOperand(1) == N1.getOperand(0))
4058             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4059                                 Cond);
4060         }
4061       }
4062 
4063       // If RHS is a legal immediate value for a compare instruction, we need
4064       // to be careful about increasing register pressure needlessly.
4065       bool LegalRHSImm = false;
4066 
4067       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4068         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4069           // Turn (X+C1) == C2 --> X == C2-C1
4070           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
4071             return DAG.getSetCC(dl, VT, N0.getOperand(0),
4072                                 DAG.getConstant(RHSC->getAPIntValue()-
4073                                                 LHSR->getAPIntValue(),
4074                                 dl, N0.getValueType()), Cond);
4075           }
4076 
4077           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4078           if (N0.getOpcode() == ISD::XOR)
4079             // If we know that all of the inverted bits are zero, don't bother
4080             // performing the inversion.
4081             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
4082               return
4083                 DAG.getSetCC(dl, VT, N0.getOperand(0),
4084                              DAG.getConstant(LHSR->getAPIntValue() ^
4085                                                RHSC->getAPIntValue(),
4086                                              dl, N0.getValueType()),
4087                              Cond);
4088         }
4089 
4090         // Turn (C1-X) == C2 --> X == C1-C2
4091         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
4092           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
4093             return
4094               DAG.getSetCC(dl, VT, N0.getOperand(1),
4095                            DAG.getConstant(SUBC->getAPIntValue() -
4096                                              RHSC->getAPIntValue(),
4097                                            dl, N0.getValueType()),
4098                            Cond);
4099           }
4100         }
4101 
4102         // Could RHSC fold directly into a compare?
4103         if (RHSC->getValueType(0).getSizeInBits() <= 64)
4104           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4105       }
4106 
4107       // (X+Y) == X --> Y == 0 and similar folds.
4108       // Don't do this if X is an immediate that can fold into a cmp
4109       // instruction and X+Y has other uses. It could be an induction variable
4110       // chain, and the transform would increase register pressure.
4111       if (!LegalRHSImm || N0.hasOneUse())
4112         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4113           return V;
4114     }
4115 
4116     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4117         N1.getOpcode() == ISD::XOR)
4118       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4119         return V;
4120 
4121     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4122       return V;
4123   }
4124 
4125   // Fold remainder of division by a constant.
4126   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4127       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4128     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4129 
4130     // When division is cheap or optimizing for minimum size,
4131     // fall through to DIVREM creation by skipping this fold.
4132     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
4133       if (N0.getOpcode() == ISD::UREM) {
4134         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4135           return Folded;
4136       } else if (N0.getOpcode() == ISD::SREM) {
4137         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4138           return Folded;
4139       }
4140     }
4141   }
4142 
4143   // Fold away ALL boolean setcc's.
4144   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4145     SDValue Temp;
4146     switch (Cond) {
4147     default: llvm_unreachable("Unknown integer setcc!");
4148     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
4149       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4150       N0 = DAG.getNOT(dl, Temp, OpVT);
4151       if (!DCI.isCalledByLegalizer())
4152         DCI.AddToWorklist(Temp.getNode());
4153       break;
4154     case ISD::SETNE:  // X != Y   -->  (X^Y)
4155       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4156       break;
4157     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
4158     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
4159       Temp = DAG.getNOT(dl, N0, OpVT);
4160       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4161       if (!DCI.isCalledByLegalizer())
4162         DCI.AddToWorklist(Temp.getNode());
4163       break;
4164     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
4165     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
4166       Temp = DAG.getNOT(dl, N1, OpVT);
4167       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4168       if (!DCI.isCalledByLegalizer())
4169         DCI.AddToWorklist(Temp.getNode());
4170       break;
4171     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
4172     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
4173       Temp = DAG.getNOT(dl, N0, OpVT);
4174       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4175       if (!DCI.isCalledByLegalizer())
4176         DCI.AddToWorklist(Temp.getNode());
4177       break;
4178     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
4179     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
4180       Temp = DAG.getNOT(dl, N1, OpVT);
4181       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4182       break;
4183     }
4184     if (VT.getScalarType() != MVT::i1) {
4185       if (!DCI.isCalledByLegalizer())
4186         DCI.AddToWorklist(N0.getNode());
4187       // FIXME: If running after legalize, we probably can't do this.
4188       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4189       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4190     }
4191     return N0;
4192   }
4193 
4194   // Could not fold it.
4195   return SDValue();
4196 }
4197 
4198 /// Returns true (and the GlobalValue and the offset) if the node is a
4199 /// GlobalAddress + offset.
4200 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4201                                     int64_t &Offset) const {
4202 
4203   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4204 
4205   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4206     GA = GASD->getGlobal();
4207     Offset += GASD->getOffset();
4208     return true;
4209   }
4210 
4211   if (N->getOpcode() == ISD::ADD) {
4212     SDValue N1 = N->getOperand(0);
4213     SDValue N2 = N->getOperand(1);
4214     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4215       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4216         Offset += V->getSExtValue();
4217         return true;
4218       }
4219     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4220       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4221         Offset += V->getSExtValue();
4222         return true;
4223       }
4224     }
4225   }
4226 
4227   return false;
4228 }
4229 
4230 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4231                                           DAGCombinerInfo &DCI) const {
4232   // Default implementation: no optimization.
4233   return SDValue();
4234 }
4235 
4236 //===----------------------------------------------------------------------===//
4237 //  Inline Assembler Implementation Methods
4238 //===----------------------------------------------------------------------===//
4239 
4240 TargetLowering::ConstraintType
4241 TargetLowering::getConstraintType(StringRef Constraint) const {
4242   unsigned S = Constraint.size();
4243 
4244   if (S == 1) {
4245     switch (Constraint[0]) {
4246     default: break;
4247     case 'r':
4248       return C_RegisterClass;
4249     case 'm': // memory
4250     case 'o': // offsetable
4251     case 'V': // not offsetable
4252       return C_Memory;
4253     case 'n': // Simple Integer
4254     case 'E': // Floating Point Constant
4255     case 'F': // Floating Point Constant
4256       return C_Immediate;
4257     case 'i': // Simple Integer or Relocatable Constant
4258     case 's': // Relocatable Constant
4259     case 'p': // Address.
4260     case 'X': // Allow ANY value.
4261     case 'I': // Target registers.
4262     case 'J':
4263     case 'K':
4264     case 'L':
4265     case 'M':
4266     case 'N':
4267     case 'O':
4268     case 'P':
4269     case '<':
4270     case '>':
4271       return C_Other;
4272     }
4273   }
4274 
4275   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4276     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4277       return C_Memory;
4278     return C_Register;
4279   }
4280   return C_Unknown;
4281 }
4282 
4283 /// Try to replace an X constraint, which matches anything, with another that
4284 /// has more specific requirements based on the type of the corresponding
4285 /// operand.
4286 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4287   if (ConstraintVT.isInteger())
4288     return "r";
4289   if (ConstraintVT.isFloatingPoint())
4290     return "f"; // works for many targets
4291   return nullptr;
4292 }
4293 
4294 SDValue TargetLowering::LowerAsmOutputForConstraint(
4295     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4296     SelectionDAG &DAG) const {
4297   return SDValue();
4298 }
4299 
4300 /// Lower the specified operand into the Ops vector.
4301 /// If it is invalid, don't add anything to Ops.
4302 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4303                                                   std::string &Constraint,
4304                                                   std::vector<SDValue> &Ops,
4305                                                   SelectionDAG &DAG) const {
4306 
4307   if (Constraint.length() > 1) return;
4308 
4309   char ConstraintLetter = Constraint[0];
4310   switch (ConstraintLetter) {
4311   default: break;
4312   case 'X':     // Allows any operand; labels (basic block) use this.
4313     if (Op.getOpcode() == ISD::BasicBlock ||
4314         Op.getOpcode() == ISD::TargetBlockAddress) {
4315       Ops.push_back(Op);
4316       return;
4317     }
4318     LLVM_FALLTHROUGH;
4319   case 'i':    // Simple Integer or Relocatable Constant
4320   case 'n':    // Simple Integer
4321   case 's': {  // Relocatable Constant
4322 
4323     GlobalAddressSDNode *GA;
4324     ConstantSDNode *C;
4325     BlockAddressSDNode *BA;
4326     uint64_t Offset = 0;
4327 
4328     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4329     // etc., since getelementpointer is variadic. We can't use
4330     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4331     // while in this case the GA may be furthest from the root node which is
4332     // likely an ISD::ADD.
4333     while (1) {
4334       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4335         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4336                                                  GA->getValueType(0),
4337                                                  Offset + GA->getOffset()));
4338         return;
4339       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4340                  ConstraintLetter != 's') {
4341         // gcc prints these as sign extended.  Sign extend value to 64 bits
4342         // now; without this it would get ZExt'd later in
4343         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4344         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4345         BooleanContent BCont = getBooleanContents(MVT::i64);
4346         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4347                                       : ISD::SIGN_EXTEND;
4348         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4349                                                     : C->getSExtValue();
4350         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4351                                             SDLoc(C), MVT::i64));
4352         return;
4353       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4354                  ConstraintLetter != 'n') {
4355         Ops.push_back(DAG.getTargetBlockAddress(
4356             BA->getBlockAddress(), BA->getValueType(0),
4357             Offset + BA->getOffset(), BA->getTargetFlags()));
4358         return;
4359       } else {
4360         const unsigned OpCode = Op.getOpcode();
4361         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4362           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4363             Op = Op.getOperand(1);
4364           // Subtraction is not commutative.
4365           else if (OpCode == ISD::ADD &&
4366                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4367             Op = Op.getOperand(0);
4368           else
4369             return;
4370           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4371           continue;
4372         }
4373       }
4374       return;
4375     }
4376     break;
4377   }
4378   }
4379 }
4380 
4381 std::pair<unsigned, const TargetRegisterClass *>
4382 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4383                                              StringRef Constraint,
4384                                              MVT VT) const {
4385   if (Constraint.empty() || Constraint[0] != '{')
4386     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4387   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4388 
4389   // Remove the braces from around the name.
4390   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4391 
4392   std::pair<unsigned, const TargetRegisterClass *> R =
4393       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4394 
4395   // Figure out which register class contains this reg.
4396   for (const TargetRegisterClass *RC : RI->regclasses()) {
4397     // If none of the value types for this register class are valid, we
4398     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4399     if (!isLegalRC(*RI, *RC))
4400       continue;
4401 
4402     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4403          I != E; ++I) {
4404       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4405         std::pair<unsigned, const TargetRegisterClass *> S =
4406             std::make_pair(*I, RC);
4407 
4408         // If this register class has the requested value type, return it,
4409         // otherwise keep searching and return the first class found
4410         // if no other is found which explicitly has the requested type.
4411         if (RI->isTypeLegalForClass(*RC, VT))
4412           return S;
4413         if (!R.second)
4414           R = S;
4415       }
4416     }
4417   }
4418 
4419   return R;
4420 }
4421 
4422 //===----------------------------------------------------------------------===//
4423 // Constraint Selection.
4424 
4425 /// Return true of this is an input operand that is a matching constraint like
4426 /// "4".
4427 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4428   assert(!ConstraintCode.empty() && "No known constraint!");
4429   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4430 }
4431 
4432 /// If this is an input matching constraint, this method returns the output
4433 /// operand it matches.
4434 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4435   assert(!ConstraintCode.empty() && "No known constraint!");
4436   return atoi(ConstraintCode.c_str());
4437 }
4438 
4439 /// Split up the constraint string from the inline assembly value into the
4440 /// specific constraints and their prefixes, and also tie in the associated
4441 /// operand values.
4442 /// If this returns an empty vector, and if the constraint string itself
4443 /// isn't empty, there was an error parsing.
4444 TargetLowering::AsmOperandInfoVector
4445 TargetLowering::ParseConstraints(const DataLayout &DL,
4446                                  const TargetRegisterInfo *TRI,
4447                                  const CallBase &Call) const {
4448   /// Information about all of the constraints.
4449   AsmOperandInfoVector ConstraintOperands;
4450   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
4451   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4452 
4453   // Do a prepass over the constraints, canonicalizing them, and building up the
4454   // ConstraintOperands list.
4455   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4456   unsigned ResNo = 0; // ResNo - The result number of the next output.
4457 
4458   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4459     ConstraintOperands.emplace_back(std::move(CI));
4460     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4461 
4462     // Update multiple alternative constraint count.
4463     if (OpInfo.multipleAlternatives.size() > maCount)
4464       maCount = OpInfo.multipleAlternatives.size();
4465 
4466     OpInfo.ConstraintVT = MVT::Other;
4467 
4468     // Compute the value type for each operand.
4469     switch (OpInfo.Type) {
4470     case InlineAsm::isOutput:
4471       // Indirect outputs just consume an argument.
4472       if (OpInfo.isIndirect) {
4473         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4474         break;
4475       }
4476 
4477       // The return value of the call is this value.  As such, there is no
4478       // corresponding argument.
4479       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
4480       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
4481         OpInfo.ConstraintVT =
4482             getSimpleValueType(DL, STy->getElementType(ResNo));
4483       } else {
4484         assert(ResNo == 0 && "Asm only has one result!");
4485         OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
4486       }
4487       ++ResNo;
4488       break;
4489     case InlineAsm::isInput:
4490       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4491       break;
4492     case InlineAsm::isClobber:
4493       // Nothing to do.
4494       break;
4495     }
4496 
4497     if (OpInfo.CallOperandVal) {
4498       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4499       if (OpInfo.isIndirect) {
4500         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4501         if (!PtrTy)
4502           report_fatal_error("Indirect operand for inline asm not a pointer!");
4503         OpTy = PtrTy->getElementType();
4504       }
4505 
4506       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4507       if (StructType *STy = dyn_cast<StructType>(OpTy))
4508         if (STy->getNumElements() == 1)
4509           OpTy = STy->getElementType(0);
4510 
4511       // If OpTy is not a single value, it may be a struct/union that we
4512       // can tile with integers.
4513       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4514         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4515         switch (BitSize) {
4516         default: break;
4517         case 1:
4518         case 8:
4519         case 16:
4520         case 32:
4521         case 64:
4522         case 128:
4523           OpInfo.ConstraintVT =
4524               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4525           break;
4526         }
4527       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4528         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4529         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4530       } else {
4531         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4532       }
4533     }
4534   }
4535 
4536   // If we have multiple alternative constraints, select the best alternative.
4537   if (!ConstraintOperands.empty()) {
4538     if (maCount) {
4539       unsigned bestMAIndex = 0;
4540       int bestWeight = -1;
4541       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4542       int weight = -1;
4543       unsigned maIndex;
4544       // Compute the sums of the weights for each alternative, keeping track
4545       // of the best (highest weight) one so far.
4546       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4547         int weightSum = 0;
4548         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4549              cIndex != eIndex; ++cIndex) {
4550           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4551           if (OpInfo.Type == InlineAsm::isClobber)
4552             continue;
4553 
4554           // If this is an output operand with a matching input operand,
4555           // look up the matching input. If their types mismatch, e.g. one
4556           // is an integer, the other is floating point, or their sizes are
4557           // different, flag it as an maCantMatch.
4558           if (OpInfo.hasMatchingInput()) {
4559             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4560             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4561               if ((OpInfo.ConstraintVT.isInteger() !=
4562                    Input.ConstraintVT.isInteger()) ||
4563                   (OpInfo.ConstraintVT.getSizeInBits() !=
4564                    Input.ConstraintVT.getSizeInBits())) {
4565                 weightSum = -1; // Can't match.
4566                 break;
4567               }
4568             }
4569           }
4570           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4571           if (weight == -1) {
4572             weightSum = -1;
4573             break;
4574           }
4575           weightSum += weight;
4576         }
4577         // Update best.
4578         if (weightSum > bestWeight) {
4579           bestWeight = weightSum;
4580           bestMAIndex = maIndex;
4581         }
4582       }
4583 
4584       // Now select chosen alternative in each constraint.
4585       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4586            cIndex != eIndex; ++cIndex) {
4587         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4588         if (cInfo.Type == InlineAsm::isClobber)
4589           continue;
4590         cInfo.selectAlternative(bestMAIndex);
4591       }
4592     }
4593   }
4594 
4595   // Check and hook up tied operands, choose constraint code to use.
4596   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4597        cIndex != eIndex; ++cIndex) {
4598     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4599 
4600     // If this is an output operand with a matching input operand, look up the
4601     // matching input. If their types mismatch, e.g. one is an integer, the
4602     // other is floating point, or their sizes are different, flag it as an
4603     // error.
4604     if (OpInfo.hasMatchingInput()) {
4605       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4606 
4607       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4608         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4609             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4610                                          OpInfo.ConstraintVT);
4611         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4612             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4613                                          Input.ConstraintVT);
4614         if ((OpInfo.ConstraintVT.isInteger() !=
4615              Input.ConstraintVT.isInteger()) ||
4616             (MatchRC.second != InputRC.second)) {
4617           report_fatal_error("Unsupported asm: input constraint"
4618                              " with a matching output constraint of"
4619                              " incompatible type!");
4620         }
4621       }
4622     }
4623   }
4624 
4625   return ConstraintOperands;
4626 }
4627 
4628 /// Return an integer indicating how general CT is.
4629 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4630   switch (CT) {
4631   case TargetLowering::C_Immediate:
4632   case TargetLowering::C_Other:
4633   case TargetLowering::C_Unknown:
4634     return 0;
4635   case TargetLowering::C_Register:
4636     return 1;
4637   case TargetLowering::C_RegisterClass:
4638     return 2;
4639   case TargetLowering::C_Memory:
4640     return 3;
4641   }
4642   llvm_unreachable("Invalid constraint type");
4643 }
4644 
4645 /// Examine constraint type and operand type and determine a weight value.
4646 /// This object must already have been set up with the operand type
4647 /// and the current alternative constraint selected.
4648 TargetLowering::ConstraintWeight
4649   TargetLowering::getMultipleConstraintMatchWeight(
4650     AsmOperandInfo &info, int maIndex) const {
4651   InlineAsm::ConstraintCodeVector *rCodes;
4652   if (maIndex >= (int)info.multipleAlternatives.size())
4653     rCodes = &info.Codes;
4654   else
4655     rCodes = &info.multipleAlternatives[maIndex].Codes;
4656   ConstraintWeight BestWeight = CW_Invalid;
4657 
4658   // Loop over the options, keeping track of the most general one.
4659   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4660     ConstraintWeight weight =
4661       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4662     if (weight > BestWeight)
4663       BestWeight = weight;
4664   }
4665 
4666   return BestWeight;
4667 }
4668 
4669 /// Examine constraint type and operand type and determine a weight value.
4670 /// This object must already have been set up with the operand type
4671 /// and the current alternative constraint selected.
4672 TargetLowering::ConstraintWeight
4673   TargetLowering::getSingleConstraintMatchWeight(
4674     AsmOperandInfo &info, const char *constraint) const {
4675   ConstraintWeight weight = CW_Invalid;
4676   Value *CallOperandVal = info.CallOperandVal;
4677     // If we don't have a value, we can't do a match,
4678     // but allow it at the lowest weight.
4679   if (!CallOperandVal)
4680     return CW_Default;
4681   // Look at the constraint type.
4682   switch (*constraint) {
4683     case 'i': // immediate integer.
4684     case 'n': // immediate integer with a known value.
4685       if (isa<ConstantInt>(CallOperandVal))
4686         weight = CW_Constant;
4687       break;
4688     case 's': // non-explicit intregal immediate.
4689       if (isa<GlobalValue>(CallOperandVal))
4690         weight = CW_Constant;
4691       break;
4692     case 'E': // immediate float if host format.
4693     case 'F': // immediate float.
4694       if (isa<ConstantFP>(CallOperandVal))
4695         weight = CW_Constant;
4696       break;
4697     case '<': // memory operand with autodecrement.
4698     case '>': // memory operand with autoincrement.
4699     case 'm': // memory operand.
4700     case 'o': // offsettable memory operand
4701     case 'V': // non-offsettable memory operand
4702       weight = CW_Memory;
4703       break;
4704     case 'r': // general register.
4705     case 'g': // general register, memory operand or immediate integer.
4706               // note: Clang converts "g" to "imr".
4707       if (CallOperandVal->getType()->isIntegerTy())
4708         weight = CW_Register;
4709       break;
4710     case 'X': // any operand.
4711   default:
4712     weight = CW_Default;
4713     break;
4714   }
4715   return weight;
4716 }
4717 
4718 /// If there are multiple different constraints that we could pick for this
4719 /// operand (e.g. "imr") try to pick the 'best' one.
4720 /// This is somewhat tricky: constraints fall into four classes:
4721 ///    Other         -> immediates and magic values
4722 ///    Register      -> one specific register
4723 ///    RegisterClass -> a group of regs
4724 ///    Memory        -> memory
4725 /// Ideally, we would pick the most specific constraint possible: if we have
4726 /// something that fits into a register, we would pick it.  The problem here
4727 /// is that if we have something that could either be in a register or in
4728 /// memory that use of the register could cause selection of *other*
4729 /// operands to fail: they might only succeed if we pick memory.  Because of
4730 /// this the heuristic we use is:
4731 ///
4732 ///  1) If there is an 'other' constraint, and if the operand is valid for
4733 ///     that constraint, use it.  This makes us take advantage of 'i'
4734 ///     constraints when available.
4735 ///  2) Otherwise, pick the most general constraint present.  This prefers
4736 ///     'm' over 'r', for example.
4737 ///
4738 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4739                              const TargetLowering &TLI,
4740                              SDValue Op, SelectionDAG *DAG) {
4741   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4742   unsigned BestIdx = 0;
4743   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4744   int BestGenerality = -1;
4745 
4746   // Loop over the options, keeping track of the most general one.
4747   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4748     TargetLowering::ConstraintType CType =
4749       TLI.getConstraintType(OpInfo.Codes[i]);
4750 
4751     // Indirect 'other' or 'immediate' constraints are not allowed.
4752     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4753                                CType == TargetLowering::C_Register ||
4754                                CType == TargetLowering::C_RegisterClass))
4755       continue;
4756 
4757     // If this is an 'other' or 'immediate' constraint, see if the operand is
4758     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4759     // the operand is an integer in the range [0..31] we want to use I (saving a
4760     // load of a register), otherwise we must use 'r'.
4761     if ((CType == TargetLowering::C_Other ||
4762          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4763       assert(OpInfo.Codes[i].size() == 1 &&
4764              "Unhandled multi-letter 'other' constraint");
4765       std::vector<SDValue> ResultOps;
4766       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4767                                        ResultOps, *DAG);
4768       if (!ResultOps.empty()) {
4769         BestType = CType;
4770         BestIdx = i;
4771         break;
4772       }
4773     }
4774 
4775     // Things with matching constraints can only be registers, per gcc
4776     // documentation.  This mainly affects "g" constraints.
4777     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4778       continue;
4779 
4780     // This constraint letter is more general than the previous one, use it.
4781     int Generality = getConstraintGenerality(CType);
4782     if (Generality > BestGenerality) {
4783       BestType = CType;
4784       BestIdx = i;
4785       BestGenerality = Generality;
4786     }
4787   }
4788 
4789   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4790   OpInfo.ConstraintType = BestType;
4791 }
4792 
4793 /// Determines the constraint code and constraint type to use for the specific
4794 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4795 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4796                                             SDValue Op,
4797                                             SelectionDAG *DAG) const {
4798   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4799 
4800   // Single-letter constraints ('r') are very common.
4801   if (OpInfo.Codes.size() == 1) {
4802     OpInfo.ConstraintCode = OpInfo.Codes[0];
4803     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4804   } else {
4805     ChooseConstraint(OpInfo, *this, Op, DAG);
4806   }
4807 
4808   // 'X' matches anything.
4809   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4810     // Labels and constants are handled elsewhere ('X' is the only thing
4811     // that matches labels).  For Functions, the type here is the type of
4812     // the result, which is not what we want to look at; leave them alone.
4813     Value *v = OpInfo.CallOperandVal;
4814     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4815       OpInfo.CallOperandVal = v;
4816       return;
4817     }
4818 
4819     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4820       return;
4821 
4822     // Otherwise, try to resolve it to something we know about by looking at
4823     // the actual operand type.
4824     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4825       OpInfo.ConstraintCode = Repl;
4826       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4827     }
4828   }
4829 }
4830 
4831 /// Given an exact SDIV by a constant, create a multiplication
4832 /// with the multiplicative inverse of the constant.
4833 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4834                               const SDLoc &dl, SelectionDAG &DAG,
4835                               SmallVectorImpl<SDNode *> &Created) {
4836   SDValue Op0 = N->getOperand(0);
4837   SDValue Op1 = N->getOperand(1);
4838   EVT VT = N->getValueType(0);
4839   EVT SVT = VT.getScalarType();
4840   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4841   EVT ShSVT = ShVT.getScalarType();
4842 
4843   bool UseSRA = false;
4844   SmallVector<SDValue, 16> Shifts, Factors;
4845 
4846   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4847     if (C->isNullValue())
4848       return false;
4849     APInt Divisor = C->getAPIntValue();
4850     unsigned Shift = Divisor.countTrailingZeros();
4851     if (Shift) {
4852       Divisor.ashrInPlace(Shift);
4853       UseSRA = true;
4854     }
4855     // Calculate the multiplicative inverse, using Newton's method.
4856     APInt t;
4857     APInt Factor = Divisor;
4858     while ((t = Divisor * Factor) != 1)
4859       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4860     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4861     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4862     return true;
4863   };
4864 
4865   // Collect all magic values from the build vector.
4866   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4867     return SDValue();
4868 
4869   SDValue Shift, Factor;
4870   if (VT.isVector()) {
4871     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4872     Factor = DAG.getBuildVector(VT, dl, Factors);
4873   } else {
4874     Shift = Shifts[0];
4875     Factor = Factors[0];
4876   }
4877 
4878   SDValue Res = Op0;
4879 
4880   // Shift the value upfront if it is even, so the LSB is one.
4881   if (UseSRA) {
4882     // TODO: For UDIV use SRL instead of SRA.
4883     SDNodeFlags Flags;
4884     Flags.setExact(true);
4885     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4886     Created.push_back(Res.getNode());
4887   }
4888 
4889   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4890 }
4891 
4892 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4893                               SelectionDAG &DAG,
4894                               SmallVectorImpl<SDNode *> &Created) const {
4895   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4896   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4897   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4898     return SDValue(N, 0); // Lower SDIV as SDIV
4899   return SDValue();
4900 }
4901 
4902 /// Given an ISD::SDIV node expressing a divide by constant,
4903 /// return a DAG expression to select that will generate the same value by
4904 /// multiplying by a magic number.
4905 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4906 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4907                                   bool IsAfterLegalization,
4908                                   SmallVectorImpl<SDNode *> &Created) const {
4909   SDLoc dl(N);
4910   EVT VT = N->getValueType(0);
4911   EVT SVT = VT.getScalarType();
4912   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4913   EVT ShSVT = ShVT.getScalarType();
4914   unsigned EltBits = VT.getScalarSizeInBits();
4915 
4916   // Check to see if we can do this.
4917   // FIXME: We should be more aggressive here.
4918   if (!isTypeLegal(VT))
4919     return SDValue();
4920 
4921   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4922   if (N->getFlags().hasExact())
4923     return BuildExactSDIV(*this, N, dl, DAG, Created);
4924 
4925   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4926 
4927   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4928     if (C->isNullValue())
4929       return false;
4930 
4931     const APInt &Divisor = C->getAPIntValue();
4932     APInt::ms magics = Divisor.magic();
4933     int NumeratorFactor = 0;
4934     int ShiftMask = -1;
4935 
4936     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4937       // If d is +1/-1, we just multiply the numerator by +1/-1.
4938       NumeratorFactor = Divisor.getSExtValue();
4939       magics.m = 0;
4940       magics.s = 0;
4941       ShiftMask = 0;
4942     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4943       // If d > 0 and m < 0, add the numerator.
4944       NumeratorFactor = 1;
4945     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4946       // If d < 0 and m > 0, subtract the numerator.
4947       NumeratorFactor = -1;
4948     }
4949 
4950     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4951     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4952     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4953     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4954     return true;
4955   };
4956 
4957   SDValue N0 = N->getOperand(0);
4958   SDValue N1 = N->getOperand(1);
4959 
4960   // Collect the shifts / magic values from each element.
4961   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4962     return SDValue();
4963 
4964   SDValue MagicFactor, Factor, Shift, ShiftMask;
4965   if (VT.isVector()) {
4966     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4967     Factor = DAG.getBuildVector(VT, dl, Factors);
4968     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4969     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4970   } else {
4971     MagicFactor = MagicFactors[0];
4972     Factor = Factors[0];
4973     Shift = Shifts[0];
4974     ShiftMask = ShiftMasks[0];
4975   }
4976 
4977   // Multiply the numerator (operand 0) by the magic value.
4978   // FIXME: We should support doing a MUL in a wider type.
4979   SDValue Q;
4980   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4981                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4982     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4983   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4984                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4985     SDValue LoHi =
4986         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4987     Q = SDValue(LoHi.getNode(), 1);
4988   } else
4989     return SDValue(); // No mulhs or equivalent.
4990   Created.push_back(Q.getNode());
4991 
4992   // (Optionally) Add/subtract the numerator using Factor.
4993   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4994   Created.push_back(Factor.getNode());
4995   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4996   Created.push_back(Q.getNode());
4997 
4998   // Shift right algebraic by shift value.
4999   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5000   Created.push_back(Q.getNode());
5001 
5002   // Extract the sign bit, mask it and add it to the quotient.
5003   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5004   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5005   Created.push_back(T.getNode());
5006   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5007   Created.push_back(T.getNode());
5008   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5009 }
5010 
5011 /// Given an ISD::UDIV node expressing a divide by constant,
5012 /// return a DAG expression to select that will generate the same value by
5013 /// multiplying by a magic number.
5014 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5015 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5016                                   bool IsAfterLegalization,
5017                                   SmallVectorImpl<SDNode *> &Created) const {
5018   SDLoc dl(N);
5019   EVT VT = N->getValueType(0);
5020   EVT SVT = VT.getScalarType();
5021   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5022   EVT ShSVT = ShVT.getScalarType();
5023   unsigned EltBits = VT.getScalarSizeInBits();
5024 
5025   // Check to see if we can do this.
5026   // FIXME: We should be more aggressive here.
5027   if (!isTypeLegal(VT))
5028     return SDValue();
5029 
5030   bool UseNPQ = false;
5031   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5032 
5033   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5034     if (C->isNullValue())
5035       return false;
5036     // FIXME: We should use a narrower constant when the upper
5037     // bits are known to be zero.
5038     APInt Divisor = C->getAPIntValue();
5039     APInt::mu magics = Divisor.magicu();
5040     unsigned PreShift = 0, PostShift = 0;
5041 
5042     // If the divisor is even, we can avoid using the expensive fixup by
5043     // shifting the divided value upfront.
5044     if (magics.a != 0 && !Divisor[0]) {
5045       PreShift = Divisor.countTrailingZeros();
5046       // Get magic number for the shifted divisor.
5047       magics = Divisor.lshr(PreShift).magicu(PreShift);
5048       assert(magics.a == 0 && "Should use cheap fixup now");
5049     }
5050 
5051     APInt Magic = magics.m;
5052 
5053     unsigned SelNPQ;
5054     if (magics.a == 0 || Divisor.isOneValue()) {
5055       assert(magics.s < Divisor.getBitWidth() &&
5056              "We shouldn't generate an undefined shift!");
5057       PostShift = magics.s;
5058       SelNPQ = false;
5059     } else {
5060       PostShift = magics.s - 1;
5061       SelNPQ = true;
5062     }
5063 
5064     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5065     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5066     NPQFactors.push_back(
5067         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5068                                : APInt::getNullValue(EltBits),
5069                         dl, SVT));
5070     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5071     UseNPQ |= SelNPQ;
5072     return true;
5073   };
5074 
5075   SDValue N0 = N->getOperand(0);
5076   SDValue N1 = N->getOperand(1);
5077 
5078   // Collect the shifts/magic values from each element.
5079   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5080     return SDValue();
5081 
5082   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5083   if (VT.isVector()) {
5084     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5085     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5086     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5087     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5088   } else {
5089     PreShift = PreShifts[0];
5090     MagicFactor = MagicFactors[0];
5091     PostShift = PostShifts[0];
5092   }
5093 
5094   SDValue Q = N0;
5095   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5096   Created.push_back(Q.getNode());
5097 
5098   // FIXME: We should support doing a MUL in a wider type.
5099   auto GetMULHU = [&](SDValue X, SDValue Y) {
5100     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
5101                             : isOperationLegalOrCustom(ISD::MULHU, VT))
5102       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5103     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
5104                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
5105       SDValue LoHi =
5106           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5107       return SDValue(LoHi.getNode(), 1);
5108     }
5109     return SDValue(); // No mulhu or equivalent
5110   };
5111 
5112   // Multiply the numerator (operand 0) by the magic value.
5113   Q = GetMULHU(Q, MagicFactor);
5114   if (!Q)
5115     return SDValue();
5116 
5117   Created.push_back(Q.getNode());
5118 
5119   if (UseNPQ) {
5120     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5121     Created.push_back(NPQ.getNode());
5122 
5123     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5124     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5125     if (VT.isVector())
5126       NPQ = GetMULHU(NPQ, NPQFactor);
5127     else
5128       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5129 
5130     Created.push_back(NPQ.getNode());
5131 
5132     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5133     Created.push_back(Q.getNode());
5134   }
5135 
5136   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5137   Created.push_back(Q.getNode());
5138 
5139   SDValue One = DAG.getConstant(1, dl, VT);
5140   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
5141   return DAG.getSelect(dl, VT, IsOne, N0, Q);
5142 }
5143 
5144 /// If all values in Values that *don't* match the predicate are same 'splat'
5145 /// value, then replace all values with that splat value.
5146 /// Else, if AlternativeReplacement was provided, then replace all values that
5147 /// do match predicate with AlternativeReplacement value.
5148 static void
5149 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5150                           std::function<bool(SDValue)> Predicate,
5151                           SDValue AlternativeReplacement = SDValue()) {
5152   SDValue Replacement;
5153   // Is there a value for which the Predicate does *NOT* match? What is it?
5154   auto SplatValue = llvm::find_if_not(Values, Predicate);
5155   if (SplatValue != Values.end()) {
5156     // Does Values consist only of SplatValue's and values matching Predicate?
5157     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5158           return Value == *SplatValue || Predicate(Value);
5159         })) // Then we shall replace values matching predicate with SplatValue.
5160       Replacement = *SplatValue;
5161   }
5162   if (!Replacement) {
5163     // Oops, we did not find the "baseline" splat value.
5164     if (!AlternativeReplacement)
5165       return; // Nothing to do.
5166     // Let's replace with provided value then.
5167     Replacement = AlternativeReplacement;
5168   }
5169   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5170 }
5171 
5172 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5173 /// where the divisor is constant and the comparison target is zero,
5174 /// return a DAG expression that will generate the same comparison result
5175 /// using only multiplications, additions and shifts/rotations.
5176 /// Ref: "Hacker's Delight" 10-17.
5177 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5178                                         SDValue CompTargetNode,
5179                                         ISD::CondCode Cond,
5180                                         DAGCombinerInfo &DCI,
5181                                         const SDLoc &DL) const {
5182   SmallVector<SDNode *, 5> Built;
5183   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5184                                          DCI, DL, Built)) {
5185     for (SDNode *N : Built)
5186       DCI.AddToWorklist(N);
5187     return Folded;
5188   }
5189 
5190   return SDValue();
5191 }
5192 
5193 SDValue
5194 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5195                                   SDValue CompTargetNode, ISD::CondCode Cond,
5196                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5197                                   SmallVectorImpl<SDNode *> &Created) const {
5198   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5199   // - D must be constant, with D = D0 * 2^K where D0 is odd
5200   // - P is the multiplicative inverse of D0 modulo 2^W
5201   // - Q = floor(((2^W) - 1) / D)
5202   // where W is the width of the common type of N and D.
5203   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5204          "Only applicable for (in)equality comparisons.");
5205 
5206   SelectionDAG &DAG = DCI.DAG;
5207 
5208   EVT VT = REMNode.getValueType();
5209   EVT SVT = VT.getScalarType();
5210   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5211   EVT ShSVT = ShVT.getScalarType();
5212 
5213   // If MUL is unavailable, we cannot proceed in any case.
5214   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5215     return SDValue();
5216 
5217   bool ComparingWithAllZeros = true;
5218   bool AllComparisonsWithNonZerosAreTautological = true;
5219   bool HadTautologicalLanes = false;
5220   bool AllLanesAreTautological = true;
5221   bool HadEvenDivisor = false;
5222   bool AllDivisorsArePowerOfTwo = true;
5223   bool HadTautologicalInvertedLanes = false;
5224   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5225 
5226   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5227     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5228     if (CDiv->isNullValue())
5229       return false;
5230 
5231     const APInt &D = CDiv->getAPIntValue();
5232     const APInt &Cmp = CCmp->getAPIntValue();
5233 
5234     ComparingWithAllZeros &= Cmp.isNullValue();
5235 
5236     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5237     // if C2 is not less than C1, the comparison is always false.
5238     // But we will only be able to produce the comparison that will give the
5239     // opposive tautological answer. So this lane would need to be fixed up.
5240     bool TautologicalInvertedLane = D.ule(Cmp);
5241     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5242 
5243     // If all lanes are tautological (either all divisors are ones, or divisor
5244     // is not greater than the constant we are comparing with),
5245     // we will prefer to avoid the fold.
5246     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
5247     HadTautologicalLanes |= TautologicalLane;
5248     AllLanesAreTautological &= TautologicalLane;
5249 
5250     // If we are comparing with non-zero, we need'll need  to subtract said
5251     // comparison value from the LHS. But there is no point in doing that if
5252     // every lane where we are comparing with non-zero is tautological..
5253     if (!Cmp.isNullValue())
5254       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5255 
5256     // Decompose D into D0 * 2^K
5257     unsigned K = D.countTrailingZeros();
5258     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5259     APInt D0 = D.lshr(K);
5260 
5261     // D is even if it has trailing zeros.
5262     HadEvenDivisor |= (K != 0);
5263     // D is a power-of-two if D0 is one.
5264     // If all divisors are power-of-two, we will prefer to avoid the fold.
5265     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5266 
5267     // P = inv(D0, 2^W)
5268     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5269     unsigned W = D.getBitWidth();
5270     APInt P = D0.zext(W + 1)
5271                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5272                   .trunc(W);
5273     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5274     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5275 
5276     // Q = floor((2^W - 1) u/ D)
5277     // R = ((2^W - 1) u% D)
5278     APInt Q, R;
5279     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5280 
5281     // If we are comparing with zero, then that comparison constant is okay,
5282     // else it may need to be one less than that.
5283     if (Cmp.ugt(R))
5284       Q -= 1;
5285 
5286     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5287            "We are expecting that K is always less than all-ones for ShSVT");
5288 
5289     // If the lane is tautological the result can be constant-folded.
5290     if (TautologicalLane) {
5291       // Set P and K amount to a bogus values so we can try to splat them.
5292       P = 0;
5293       K = -1;
5294       // And ensure that comparison constant is tautological,
5295       // it will always compare true/false.
5296       Q = -1;
5297     }
5298 
5299     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5300     KAmts.push_back(
5301         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5302     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5303     return true;
5304   };
5305 
5306   SDValue N = REMNode.getOperand(0);
5307   SDValue D = REMNode.getOperand(1);
5308 
5309   // Collect the values from each element.
5310   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5311     return SDValue();
5312 
5313   // If all lanes are tautological, the result can be constant-folded.
5314   if (AllLanesAreTautological)
5315     return SDValue();
5316 
5317   // If this is a urem by a powers-of-two, avoid the fold since it can be
5318   // best implemented as a bit test.
5319   if (AllDivisorsArePowerOfTwo)
5320     return SDValue();
5321 
5322   SDValue PVal, KVal, QVal;
5323   if (VT.isVector()) {
5324     if (HadTautologicalLanes) {
5325       // Try to turn PAmts into a splat, since we don't care about the values
5326       // that are currently '0'. If we can't, just keep '0'`s.
5327       turnVectorIntoSplatVector(PAmts, isNullConstant);
5328       // Try to turn KAmts into a splat, since we don't care about the values
5329       // that are currently '-1'. If we can't, change them to '0'`s.
5330       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5331                                 DAG.getConstant(0, DL, ShSVT));
5332     }
5333 
5334     PVal = DAG.getBuildVector(VT, DL, PAmts);
5335     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5336     QVal = DAG.getBuildVector(VT, DL, QAmts);
5337   } else {
5338     PVal = PAmts[0];
5339     KVal = KAmts[0];
5340     QVal = QAmts[0];
5341   }
5342 
5343   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5344     if (!isOperationLegalOrCustom(ISD::SUB, VT))
5345       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5346     assert(CompTargetNode.getValueType() == N.getValueType() &&
5347            "Expecting that the types on LHS and RHS of comparisons match.");
5348     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5349   }
5350 
5351   // (mul N, P)
5352   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5353   Created.push_back(Op0.getNode());
5354 
5355   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5356   // divisors as a performance improvement, since rotating by 0 is a no-op.
5357   if (HadEvenDivisor) {
5358     // We need ROTR to do this.
5359     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5360       return SDValue();
5361     SDNodeFlags Flags;
5362     Flags.setExact(true);
5363     // UREM: (rotr (mul N, P), K)
5364     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5365     Created.push_back(Op0.getNode());
5366   }
5367 
5368   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5369   SDValue NewCC =
5370       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5371                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5372   if (!HadTautologicalInvertedLanes)
5373     return NewCC;
5374 
5375   // If any lanes previously compared always-false, the NewCC will give
5376   // always-true result for them, so we need to fixup those lanes.
5377   // Or the other way around for inequality predicate.
5378   assert(VT.isVector() && "Can/should only get here for vectors.");
5379   Created.push_back(NewCC.getNode());
5380 
5381   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5382   // if C2 is not less than C1, the comparison is always false.
5383   // But we have produced the comparison that will give the
5384   // opposive tautological answer. So these lanes would need to be fixed up.
5385   SDValue TautologicalInvertedChannels =
5386       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5387   Created.push_back(TautologicalInvertedChannels.getNode());
5388 
5389   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5390     // If we have a vector select, let's replace the comparison results in the
5391     // affected lanes with the correct tautological result.
5392     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5393                                               DL, SETCCVT, SETCCVT);
5394     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5395                        Replacement, NewCC);
5396   }
5397 
5398   // Else, we can just invert the comparison result in the appropriate lanes.
5399   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5400     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5401                        TautologicalInvertedChannels);
5402 
5403   return SDValue(); // Don't know how to lower.
5404 }
5405 
5406 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5407 /// where the divisor is constant and the comparison target is zero,
5408 /// return a DAG expression that will generate the same comparison result
5409 /// using only multiplications, additions and shifts/rotations.
5410 /// Ref: "Hacker's Delight" 10-17.
5411 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5412                                         SDValue CompTargetNode,
5413                                         ISD::CondCode Cond,
5414                                         DAGCombinerInfo &DCI,
5415                                         const SDLoc &DL) const {
5416   SmallVector<SDNode *, 7> Built;
5417   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5418                                          DCI, DL, Built)) {
5419     assert(Built.size() <= 7 && "Max size prediction failed.");
5420     for (SDNode *N : Built)
5421       DCI.AddToWorklist(N);
5422     return Folded;
5423   }
5424 
5425   return SDValue();
5426 }
5427 
5428 SDValue
5429 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5430                                   SDValue CompTargetNode, ISD::CondCode Cond,
5431                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5432                                   SmallVectorImpl<SDNode *> &Created) const {
5433   // Fold:
5434   //   (seteq/ne (srem N, D), 0)
5435   // To:
5436   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5437   //
5438   // - D must be constant, with D = D0 * 2^K where D0 is odd
5439   // - P is the multiplicative inverse of D0 modulo 2^W
5440   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5441   // - Q = floor((2 * A) / (2^K))
5442   // where W is the width of the common type of N and D.
5443   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5444          "Only applicable for (in)equality comparisons.");
5445 
5446   SelectionDAG &DAG = DCI.DAG;
5447 
5448   EVT VT = REMNode.getValueType();
5449   EVT SVT = VT.getScalarType();
5450   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5451   EVT ShSVT = ShVT.getScalarType();
5452 
5453   // If MUL is unavailable, we cannot proceed in any case.
5454   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5455     return SDValue();
5456 
5457   // TODO: Could support comparing with non-zero too.
5458   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5459   if (!CompTarget || !CompTarget->isNullValue())
5460     return SDValue();
5461 
5462   bool HadIntMinDivisor = false;
5463   bool HadOneDivisor = false;
5464   bool AllDivisorsAreOnes = true;
5465   bool HadEvenDivisor = false;
5466   bool NeedToApplyOffset = false;
5467   bool AllDivisorsArePowerOfTwo = true;
5468   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5469 
5470   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5471     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5472     if (C->isNullValue())
5473       return false;
5474 
5475     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5476 
5477     // WARNING: this fold is only valid for positive divisors!
5478     APInt D = C->getAPIntValue();
5479     if (D.isNegative())
5480       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5481 
5482     HadIntMinDivisor |= D.isMinSignedValue();
5483 
5484     // If all divisors are ones, we will prefer to avoid the fold.
5485     HadOneDivisor |= D.isOneValue();
5486     AllDivisorsAreOnes &= D.isOneValue();
5487 
5488     // Decompose D into D0 * 2^K
5489     unsigned K = D.countTrailingZeros();
5490     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5491     APInt D0 = D.lshr(K);
5492 
5493     if (!D.isMinSignedValue()) {
5494       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5495       // we don't care about this lane in this fold, we'll special-handle it.
5496       HadEvenDivisor |= (K != 0);
5497     }
5498 
5499     // D is a power-of-two if D0 is one. This includes INT_MIN.
5500     // If all divisors are power-of-two, we will prefer to avoid the fold.
5501     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5502 
5503     // P = inv(D0, 2^W)
5504     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5505     unsigned W = D.getBitWidth();
5506     APInt P = D0.zext(W + 1)
5507                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5508                   .trunc(W);
5509     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5510     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5511 
5512     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5513     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5514     A.clearLowBits(K);
5515 
5516     if (!D.isMinSignedValue()) {
5517       // If divisor INT_MIN, then we don't care about this lane in this fold,
5518       // we'll special-handle it.
5519       NeedToApplyOffset |= A != 0;
5520     }
5521 
5522     // Q = floor((2 * A) / (2^K))
5523     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5524 
5525     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5526            "We are expecting that A is always less than all-ones for SVT");
5527     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5528            "We are expecting that K is always less than all-ones for ShSVT");
5529 
5530     // If the divisor is 1 the result can be constant-folded. Likewise, we
5531     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5532     if (D.isOneValue()) {
5533       // Set P, A and K to a bogus values so we can try to splat them.
5534       P = 0;
5535       A = -1;
5536       K = -1;
5537 
5538       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5539       Q = -1;
5540     }
5541 
5542     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5543     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5544     KAmts.push_back(
5545         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5546     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5547     return true;
5548   };
5549 
5550   SDValue N = REMNode.getOperand(0);
5551   SDValue D = REMNode.getOperand(1);
5552 
5553   // Collect the values from each element.
5554   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5555     return SDValue();
5556 
5557   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5558   if (AllDivisorsAreOnes)
5559     return SDValue();
5560 
5561   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5562   // since it can be best implemented as a bit test.
5563   if (AllDivisorsArePowerOfTwo)
5564     return SDValue();
5565 
5566   SDValue PVal, AVal, KVal, QVal;
5567   if (VT.isVector()) {
5568     if (HadOneDivisor) {
5569       // Try to turn PAmts into a splat, since we don't care about the values
5570       // that are currently '0'. If we can't, just keep '0'`s.
5571       turnVectorIntoSplatVector(PAmts, isNullConstant);
5572       // Try to turn AAmts into a splat, since we don't care about the
5573       // values that are currently '-1'. If we can't, change them to '0'`s.
5574       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5575                                 DAG.getConstant(0, DL, SVT));
5576       // Try to turn KAmts into a splat, since we don't care about the values
5577       // that are currently '-1'. If we can't, change them to '0'`s.
5578       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5579                                 DAG.getConstant(0, DL, ShSVT));
5580     }
5581 
5582     PVal = DAG.getBuildVector(VT, DL, PAmts);
5583     AVal = DAG.getBuildVector(VT, DL, AAmts);
5584     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5585     QVal = DAG.getBuildVector(VT, DL, QAmts);
5586   } else {
5587     PVal = PAmts[0];
5588     AVal = AAmts[0];
5589     KVal = KAmts[0];
5590     QVal = QAmts[0];
5591   }
5592 
5593   // (mul N, P)
5594   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5595   Created.push_back(Op0.getNode());
5596 
5597   if (NeedToApplyOffset) {
5598     // We need ADD to do this.
5599     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5600       return SDValue();
5601 
5602     // (add (mul N, P), A)
5603     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5604     Created.push_back(Op0.getNode());
5605   }
5606 
5607   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5608   // divisors as a performance improvement, since rotating by 0 is a no-op.
5609   if (HadEvenDivisor) {
5610     // We need ROTR to do this.
5611     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5612       return SDValue();
5613     SDNodeFlags Flags;
5614     Flags.setExact(true);
5615     // SREM: (rotr (add (mul N, P), A), K)
5616     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5617     Created.push_back(Op0.getNode());
5618   }
5619 
5620   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5621   SDValue Fold =
5622       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5623                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5624 
5625   // If we didn't have lanes with INT_MIN divisor, then we're done.
5626   if (!HadIntMinDivisor)
5627     return Fold;
5628 
5629   // That fold is only valid for positive divisors. Which effectively means,
5630   // it is invalid for INT_MIN divisors. So if we have such a lane,
5631   // we must fix-up results for said lanes.
5632   assert(VT.isVector() && "Can/should only get here for vectors.");
5633 
5634   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5635       !isOperationLegalOrCustom(ISD::AND, VT) ||
5636       !isOperationLegalOrCustom(Cond, VT) ||
5637       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5638     return SDValue();
5639 
5640   Created.push_back(Fold.getNode());
5641 
5642   SDValue IntMin = DAG.getConstant(
5643       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5644   SDValue IntMax = DAG.getConstant(
5645       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5646   SDValue Zero =
5647       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5648 
5649   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5650   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5651   Created.push_back(DivisorIsIntMin.getNode());
5652 
5653   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5654   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5655   Created.push_back(Masked.getNode());
5656   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5657   Created.push_back(MaskedIsZero.getNode());
5658 
5659   // To produce final result we need to blend 2 vectors: 'SetCC' and
5660   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5661   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5662   // constant-folded, select can get lowered to a shuffle with constant mask.
5663   SDValue Blended =
5664       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5665 
5666   return Blended;
5667 }
5668 
5669 bool TargetLowering::
5670 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5671   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5672     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5673                                 "be a constant integer");
5674     return true;
5675   }
5676 
5677   return false;
5678 }
5679 
5680 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5681                                              bool LegalOps, bool OptForSize,
5682                                              NegatibleCost &Cost,
5683                                              unsigned Depth) const {
5684   // fneg is removable even if it has multiple uses.
5685   if (Op.getOpcode() == ISD::FNEG) {
5686     Cost = NegatibleCost::Cheaper;
5687     return Op.getOperand(0);
5688   }
5689 
5690   // Don't recurse exponentially.
5691   if (Depth > SelectionDAG::MaxRecursionDepth)
5692     return SDValue();
5693 
5694   // Pre-increment recursion depth for use in recursive calls.
5695   ++Depth;
5696   const SDNodeFlags Flags = Op->getFlags();
5697   const TargetOptions &Options = DAG.getTarget().Options;
5698   EVT VT = Op.getValueType();
5699   unsigned Opcode = Op.getOpcode();
5700 
5701   // Don't allow anything with multiple uses unless we know it is free.
5702   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
5703     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
5704                         isFPExtFree(VT, Op.getOperand(0).getValueType());
5705     if (!IsFreeExtend)
5706       return SDValue();
5707   }
5708 
5709   SDLoc DL(Op);
5710 
5711   switch (Opcode) {
5712   case ISD::ConstantFP: {
5713     // Don't invert constant FP values after legalization unless the target says
5714     // the negated constant is legal.
5715     bool IsOpLegal =
5716         isOperationLegal(ISD::ConstantFP, VT) ||
5717         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5718                      OptForSize);
5719 
5720     if (LegalOps && !IsOpLegal)
5721       break;
5722 
5723     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5724     V.changeSign();
5725     SDValue CFP = DAG.getConstantFP(V, DL, VT);
5726 
5727     // If we already have the use of the negated floating constant, it is free
5728     // to negate it even it has multiple uses.
5729     if (!Op.hasOneUse() && CFP.use_empty())
5730       break;
5731     Cost = NegatibleCost::Neutral;
5732     return CFP;
5733   }
5734   case ISD::BUILD_VECTOR: {
5735     // Only permit BUILD_VECTOR of constants.
5736     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5737           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5738         }))
5739       break;
5740 
5741     bool IsOpLegal =
5742         (isOperationLegal(ISD::ConstantFP, VT) &&
5743          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
5744         llvm::all_of(Op->op_values(), [&](SDValue N) {
5745           return N.isUndef() ||
5746                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5747                               OptForSize);
5748         });
5749 
5750     if (LegalOps && !IsOpLegal)
5751       break;
5752 
5753     SmallVector<SDValue, 4> Ops;
5754     for (SDValue C : Op->op_values()) {
5755       if (C.isUndef()) {
5756         Ops.push_back(C);
5757         continue;
5758       }
5759       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5760       V.changeSign();
5761       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
5762     }
5763     Cost = NegatibleCost::Neutral;
5764     return DAG.getBuildVector(VT, DL, Ops);
5765   }
5766   case ISD::FADD: {
5767     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5768       break;
5769 
5770     // After operation legalization, it might not be legal to create new FSUBs.
5771     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
5772       break;
5773     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5774 
5775     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
5776     NegatibleCost CostX = NegatibleCost::Expensive;
5777     SDValue NegX =
5778         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5779     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
5780     NegatibleCost CostY = NegatibleCost::Expensive;
5781     SDValue NegY =
5782         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5783 
5784     // Negate the X if its cost is less or equal than Y.
5785     if (NegX && (CostX <= CostY)) {
5786       Cost = CostX;
5787       return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
5788     }
5789 
5790     // Negate the Y if it is not expensive.
5791     if (NegY) {
5792       Cost = CostY;
5793       return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
5794     }
5795     break;
5796   }
5797   case ISD::FSUB: {
5798     // We can't turn -(A-B) into B-A when we honor signed zeros.
5799     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5800       break;
5801 
5802     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5803     // fold (fneg (fsub 0, Y)) -> Y
5804     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
5805       if (C->isZero()) {
5806         Cost = NegatibleCost::Cheaper;
5807         return Y;
5808       }
5809 
5810     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
5811     Cost = NegatibleCost::Neutral;
5812     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
5813   }
5814   case ISD::FMUL:
5815   case ISD::FDIV: {
5816     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
5817 
5818     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5819     NegatibleCost CostX = NegatibleCost::Expensive;
5820     SDValue NegX =
5821         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5822     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5823     NegatibleCost CostY = NegatibleCost::Expensive;
5824     SDValue NegY =
5825         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5826 
5827     // Negate the X if its cost is less or equal than Y.
5828     if (NegX && (CostX <= CostY)) {
5829       Cost = CostX;
5830       return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
5831     }
5832 
5833     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5834     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5835       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5836         break;
5837 
5838     // Negate the Y if it is not expensive.
5839     if (NegY) {
5840       Cost = CostY;
5841       return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
5842     }
5843     break;
5844   }
5845   case ISD::FMA:
5846   case ISD::FMAD: {
5847     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5848       break;
5849 
5850     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
5851     NegatibleCost CostZ = NegatibleCost::Expensive;
5852     SDValue NegZ =
5853         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
5854     // Give up if fail to negate the Z.
5855     if (!NegZ)
5856       break;
5857 
5858     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5859     NegatibleCost CostX = NegatibleCost::Expensive;
5860     SDValue NegX =
5861         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
5862     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5863     NegatibleCost CostY = NegatibleCost::Expensive;
5864     SDValue NegY =
5865         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
5866 
5867     // Negate the X if its cost is less or equal than Y.
5868     if (NegX && (CostX <= CostY)) {
5869       Cost = std::min(CostX, CostZ);
5870       return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
5871     }
5872 
5873     // Negate the Y if it is not expensive.
5874     if (NegY) {
5875       Cost = std::min(CostY, CostZ);
5876       return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
5877     }
5878     break;
5879   }
5880 
5881   case ISD::FP_EXTEND:
5882   case ISD::FSIN:
5883     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
5884                                             OptForSize, Cost, Depth))
5885       return DAG.getNode(Opcode, DL, VT, NegV);
5886     break;
5887   case ISD::FP_ROUND:
5888     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
5889                                             OptForSize, Cost, Depth))
5890       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
5891     break;
5892   }
5893 
5894   return SDValue();
5895 }
5896 
5897 //===----------------------------------------------------------------------===//
5898 // Legalization Utilities
5899 //===----------------------------------------------------------------------===//
5900 
5901 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5902                                     SDValue LHS, SDValue RHS,
5903                                     SmallVectorImpl<SDValue> &Result,
5904                                     EVT HiLoVT, SelectionDAG &DAG,
5905                                     MulExpansionKind Kind, SDValue LL,
5906                                     SDValue LH, SDValue RL, SDValue RH) const {
5907   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5908          Opcode == ISD::SMUL_LOHI);
5909 
5910   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5911                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5912   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5913                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5914   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5915                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5916   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5917                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5918 
5919   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5920     return false;
5921 
5922   unsigned OuterBitSize = VT.getScalarSizeInBits();
5923   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5924   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5925   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5926 
5927   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5928   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5929          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5930 
5931   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5932   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5933                           bool Signed) -> bool {
5934     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5935       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5936       Hi = SDValue(Lo.getNode(), 1);
5937       return true;
5938     }
5939     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5940       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5941       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5942       return true;
5943     }
5944     return false;
5945   };
5946 
5947   SDValue Lo, Hi;
5948 
5949   if (!LL.getNode() && !RL.getNode() &&
5950       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5951     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5952     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5953   }
5954 
5955   if (!LL.getNode())
5956     return false;
5957 
5958   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5959   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5960       DAG.MaskedValueIsZero(RHS, HighMask)) {
5961     // The inputs are both zero-extended.
5962     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5963       Result.push_back(Lo);
5964       Result.push_back(Hi);
5965       if (Opcode != ISD::MUL) {
5966         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5967         Result.push_back(Zero);
5968         Result.push_back(Zero);
5969       }
5970       return true;
5971     }
5972   }
5973 
5974   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5975       RHSSB > InnerBitSize) {
5976     // The input values are both sign-extended.
5977     // TODO non-MUL case?
5978     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5979       Result.push_back(Lo);
5980       Result.push_back(Hi);
5981       return true;
5982     }
5983   }
5984 
5985   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5986   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5987   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5988     // FIXME getShiftAmountTy does not always return a sensible result when VT
5989     // is an illegal type, and so the type may be too small to fit the shift
5990     // amount. Override it with i32. The shift will have to be legalized.
5991     ShiftAmountTy = MVT::i32;
5992   }
5993   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5994 
5995   if (!LH.getNode() && !RH.getNode() &&
5996       isOperationLegalOrCustom(ISD::SRL, VT) &&
5997       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5998     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5999     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6000     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6001     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6002   }
6003 
6004   if (!LH.getNode())
6005     return false;
6006 
6007   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6008     return false;
6009 
6010   Result.push_back(Lo);
6011 
6012   if (Opcode == ISD::MUL) {
6013     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6014     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6015     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6016     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6017     Result.push_back(Hi);
6018     return true;
6019   }
6020 
6021   // Compute the full width result.
6022   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6023     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6024     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6025     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6026     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6027   };
6028 
6029   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6030   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6031     return false;
6032 
6033   // This is effectively the add part of a multiply-add of half-sized operands,
6034   // so it cannot overflow.
6035   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6036 
6037   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6038     return false;
6039 
6040   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6041   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6042 
6043   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6044                   isOperationLegalOrCustom(ISD::ADDE, VT));
6045   if (UseGlue)
6046     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6047                        Merge(Lo, Hi));
6048   else
6049     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6050                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6051 
6052   SDValue Carry = Next.getValue(1);
6053   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6054   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6055 
6056   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6057     return false;
6058 
6059   if (UseGlue)
6060     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6061                      Carry);
6062   else
6063     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6064                      Zero, Carry);
6065 
6066   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6067 
6068   if (Opcode == ISD::SMUL_LOHI) {
6069     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6070                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6071     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6072 
6073     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6074                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6075     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6076   }
6077 
6078   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6079   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6080   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6081   return true;
6082 }
6083 
6084 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6085                                SelectionDAG &DAG, MulExpansionKind Kind,
6086                                SDValue LL, SDValue LH, SDValue RL,
6087                                SDValue RH) const {
6088   SmallVector<SDValue, 2> Result;
6089   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
6090                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6091                            DAG, Kind, LL, LH, RL, RH);
6092   if (Ok) {
6093     assert(Result.size() == 2);
6094     Lo = Result[0];
6095     Hi = Result[1];
6096   }
6097   return Ok;
6098 }
6099 
6100 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
6101                                        SelectionDAG &DAG) const {
6102   EVT VT = Node->getValueType(0);
6103 
6104   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6105                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6106                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6107                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6108     return false;
6109 
6110   // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6111   // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6112   SDValue X = Node->getOperand(0);
6113   SDValue Y = Node->getOperand(1);
6114   SDValue Z = Node->getOperand(2);
6115 
6116   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6117   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6118   SDLoc DL(SDValue(Node, 0));
6119 
6120   EVT ShVT = Z.getValueType();
6121   SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6122   SDValue ShAmt, InvShAmt;
6123   if (isPowerOf2_32(EltSizeInBits)) {
6124     // Z % BW -> Z & (BW - 1)
6125     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
6126     // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6127     InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
6128   } else {
6129     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6130     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6131     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
6132   }
6133 
6134   SDValue One = DAG.getConstant(1, DL, ShVT);
6135   SDValue ShX, ShY;
6136   if (IsFSHL) {
6137     ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
6138     SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
6139     ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
6140   } else {
6141     SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
6142     ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
6143     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
6144   }
6145   Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
6146   return true;
6147 }
6148 
6149 // TODO: Merge with expandFunnelShift.
6150 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
6151                                SelectionDAG &DAG) const {
6152   EVT VT = Node->getValueType(0);
6153   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6154   bool IsLeft = Node->getOpcode() == ISD::ROTL;
6155   SDValue Op0 = Node->getOperand(0);
6156   SDValue Op1 = Node->getOperand(1);
6157   SDLoc DL(SDValue(Node, 0));
6158 
6159   EVT ShVT = Op1.getValueType();
6160   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6161 
6162   // If a rotate in the other direction is legal, use it.
6163   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
6164   if (isOperationLegal(RevRot, VT)) {
6165     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
6166     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
6167     return true;
6168   }
6169 
6170   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6171                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6172                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6173                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
6174                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6175     return false;
6176 
6177   // Otherwise,
6178   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
6179   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
6180   //
6181   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
6182          "Expecting the type bitwidth to be a power of 2");
6183   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
6184   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
6185   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6186   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
6187   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
6188   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
6189   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
6190                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
6191   return true;
6192 }
6193 
6194 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
6195                                       SelectionDAG &DAG) const {
6196   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6197   SDValue Src = Node->getOperand(OpNo);
6198   EVT SrcVT = Src.getValueType();
6199   EVT DstVT = Node->getValueType(0);
6200   SDLoc dl(SDValue(Node, 0));
6201 
6202   // FIXME: Only f32 to i64 conversions are supported.
6203   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
6204     return false;
6205 
6206   if (Node->isStrictFPOpcode())
6207     // When a NaN is converted to an integer a trap is allowed. We can't
6208     // use this expansion here because it would eliminate that trap. Other
6209     // traps are also allowed and cannot be eliminated. See
6210     // IEEE 754-2008 sec 5.8.
6211     return false;
6212 
6213   // Expand f32 -> i64 conversion
6214   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6215   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
6216   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
6217   EVT IntVT = SrcVT.changeTypeToInteger();
6218   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
6219 
6220   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6221   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6222   SDValue Bias = DAG.getConstant(127, dl, IntVT);
6223   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6224   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6225   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6226 
6227   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6228 
6229   SDValue ExponentBits = DAG.getNode(
6230       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6231       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6232   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6233 
6234   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6235                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6236                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6237   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6238 
6239   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6240                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6241                           DAG.getConstant(0x00800000, dl, IntVT));
6242 
6243   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6244 
6245   R = DAG.getSelectCC(
6246       dl, Exponent, ExponentLoBit,
6247       DAG.getNode(ISD::SHL, dl, DstVT, R,
6248                   DAG.getZExtOrTrunc(
6249                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6250                       dl, IntShVT)),
6251       DAG.getNode(ISD::SRL, dl, DstVT, R,
6252                   DAG.getZExtOrTrunc(
6253                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6254                       dl, IntShVT)),
6255       ISD::SETGT);
6256 
6257   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6258                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6259 
6260   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6261                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6262   return true;
6263 }
6264 
6265 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6266                                       SDValue &Chain,
6267                                       SelectionDAG &DAG) const {
6268   SDLoc dl(SDValue(Node, 0));
6269   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6270   SDValue Src = Node->getOperand(OpNo);
6271 
6272   EVT SrcVT = Src.getValueType();
6273   EVT DstVT = Node->getValueType(0);
6274   EVT SetCCVT =
6275       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6276   EVT DstSetCCVT =
6277       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
6278 
6279   // Only expand vector types if we have the appropriate vector bit operations.
6280   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6281                                                    ISD::FP_TO_SINT;
6282   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6283                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6284     return false;
6285 
6286   // If the maximum float value is smaller then the signed integer range,
6287   // the destination signmask can't be represented by the float, so we can
6288   // just use FP_TO_SINT directly.
6289   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6290   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6291   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6292   if (APFloat::opOverflow &
6293       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6294     if (Node->isStrictFPOpcode()) {
6295       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6296                            { Node->getOperand(0), Src });
6297       Chain = Result.getValue(1);
6298     } else
6299       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6300     return true;
6301   }
6302 
6303   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6304   SDValue Sel;
6305 
6306   if (Node->isStrictFPOpcode()) {
6307     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6308                        Node->getOperand(0), /*IsSignaling*/ true);
6309     Chain = Sel.getValue(1);
6310   } else {
6311     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6312   }
6313 
6314   bool Strict = Node->isStrictFPOpcode() ||
6315                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6316 
6317   if (Strict) {
6318     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6319     // signmask then offset (the result of which should be fully representable).
6320     // Sel = Src < 0x8000000000000000
6321     // FltOfs = select Sel, 0, 0x8000000000000000
6322     // IntOfs = select Sel, 0, 0x8000000000000000
6323     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6324 
6325     // TODO: Should any fast-math-flags be set for the FSUB?
6326     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6327                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6328     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6329     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6330                                    DAG.getConstant(0, dl, DstVT),
6331                                    DAG.getConstant(SignMask, dl, DstVT));
6332     SDValue SInt;
6333     if (Node->isStrictFPOpcode()) {
6334       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6335                                 { Chain, Src, FltOfs });
6336       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6337                          { Val.getValue(1), Val });
6338       Chain = SInt.getValue(1);
6339     } else {
6340       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6341       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6342     }
6343     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6344   } else {
6345     // Expand based on maximum range of FP_TO_SINT:
6346     // True = fp_to_sint(Src)
6347     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6348     // Result = select (Src < 0x8000000000000000), True, False
6349 
6350     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6351     // TODO: Should any fast-math-flags be set for the FSUB?
6352     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6353                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6354     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6355                         DAG.getConstant(SignMask, dl, DstVT));
6356     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6357     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6358   }
6359   return true;
6360 }
6361 
6362 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6363                                       SDValue &Chain,
6364                                       SelectionDAG &DAG) const {
6365   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6366   SDValue Src = Node->getOperand(OpNo);
6367   EVT SrcVT = Src.getValueType();
6368   EVT DstVT = Node->getValueType(0);
6369 
6370   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
6371     return false;
6372 
6373   // Only expand vector types if we have the appropriate vector bit operations.
6374   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6375                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6376                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6377                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6378                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6379     return false;
6380 
6381   SDLoc dl(SDValue(Node, 0));
6382   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6383 
6384   // Implementation of unsigned i64 to f64 following the algorithm in
6385   // __floatundidf in compiler_rt. This implementation has the advantage
6386   // of performing rounding correctly, both in the default rounding mode
6387   // and in all alternate rounding modes.
6388   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6389   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6390       BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6391   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6392   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6393   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6394 
6395   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6396   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6397   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6398   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6399   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6400   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6401   if (Node->isStrictFPOpcode()) {
6402     SDValue HiSub =
6403         DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
6404                     {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
6405     Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
6406                          {HiSub.getValue(1), LoFlt, HiSub});
6407     Chain = Result.getValue(1);
6408   } else {
6409     SDValue HiSub =
6410         DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6411     Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6412   }
6413   return true;
6414 }
6415 
6416 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6417                                               SelectionDAG &DAG) const {
6418   SDLoc dl(Node);
6419   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6420     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6421   EVT VT = Node->getValueType(0);
6422   if (isOperationLegalOrCustom(NewOp, VT)) {
6423     SDValue Quiet0 = Node->getOperand(0);
6424     SDValue Quiet1 = Node->getOperand(1);
6425 
6426     if (!Node->getFlags().hasNoNaNs()) {
6427       // Insert canonicalizes if it's possible we need to quiet to get correct
6428       // sNaN behavior.
6429       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6430         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6431                              Node->getFlags());
6432       }
6433       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6434         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6435                              Node->getFlags());
6436       }
6437     }
6438 
6439     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6440   }
6441 
6442   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6443   // instead if there are no NaNs.
6444   if (Node->getFlags().hasNoNaNs()) {
6445     unsigned IEEE2018Op =
6446         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6447     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6448       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6449                          Node->getOperand(1), Node->getFlags());
6450     }
6451   }
6452 
6453   // If none of the above worked, but there are no NaNs, then expand to
6454   // a compare/select sequence.  This is required for correctness since
6455   // InstCombine might have canonicalized a fcmp+select sequence to a
6456   // FMINNUM/FMAXNUM node.  If we were to fall through to the default
6457   // expansion to libcall, we might introduce a link-time dependency
6458   // on libm into a file that originally did not have one.
6459   if (Node->getFlags().hasNoNaNs()) {
6460     ISD::CondCode Pred =
6461         Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6462     SDValue Op1 = Node->getOperand(0);
6463     SDValue Op2 = Node->getOperand(1);
6464     SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6465     // Copy FMF flags, but always set the no-signed-zeros flag
6466     // as this is implied by the FMINNUM/FMAXNUM semantics.
6467     SDNodeFlags Flags = Node->getFlags();
6468     Flags.setNoSignedZeros(true);
6469     SelCC->setFlags(Flags);
6470     return SelCC;
6471   }
6472 
6473   return SDValue();
6474 }
6475 
6476 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6477                                  SelectionDAG &DAG) const {
6478   SDLoc dl(Node);
6479   EVT VT = Node->getValueType(0);
6480   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6481   SDValue Op = Node->getOperand(0);
6482   unsigned Len = VT.getScalarSizeInBits();
6483   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6484 
6485   // TODO: Add support for irregular type lengths.
6486   if (!(Len <= 128 && Len % 8 == 0))
6487     return false;
6488 
6489   // Only expand vector types if we have the appropriate vector bit operations.
6490   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6491                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6492                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6493                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6494                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6495     return false;
6496 
6497   // This is the "best" algorithm from
6498   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6499   SDValue Mask55 =
6500       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6501   SDValue Mask33 =
6502       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6503   SDValue Mask0F =
6504       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6505   SDValue Mask01 =
6506       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6507 
6508   // v = v - ((v >> 1) & 0x55555555...)
6509   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6510                    DAG.getNode(ISD::AND, dl, VT,
6511                                DAG.getNode(ISD::SRL, dl, VT, Op,
6512                                            DAG.getConstant(1, dl, ShVT)),
6513                                Mask55));
6514   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6515   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6516                    DAG.getNode(ISD::AND, dl, VT,
6517                                DAG.getNode(ISD::SRL, dl, VT, Op,
6518                                            DAG.getConstant(2, dl, ShVT)),
6519                                Mask33));
6520   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6521   Op = DAG.getNode(ISD::AND, dl, VT,
6522                    DAG.getNode(ISD::ADD, dl, VT, Op,
6523                                DAG.getNode(ISD::SRL, dl, VT, Op,
6524                                            DAG.getConstant(4, dl, ShVT))),
6525                    Mask0F);
6526   // v = (v * 0x01010101...) >> (Len - 8)
6527   if (Len > 8)
6528     Op =
6529         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6530                     DAG.getConstant(Len - 8, dl, ShVT));
6531 
6532   Result = Op;
6533   return true;
6534 }
6535 
6536 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6537                                 SelectionDAG &DAG) const {
6538   SDLoc dl(Node);
6539   EVT VT = Node->getValueType(0);
6540   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6541   SDValue Op = Node->getOperand(0);
6542   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6543 
6544   // If the non-ZERO_UNDEF version is supported we can use that instead.
6545   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6546       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6547     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6548     return true;
6549   }
6550 
6551   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6552   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6553     EVT SetCCVT =
6554         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6555     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6556     SDValue Zero = DAG.getConstant(0, dl, VT);
6557     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6558     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6559                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6560     return true;
6561   }
6562 
6563   // Only expand vector types if we have the appropriate vector bit operations.
6564   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6565                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6566                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6567                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6568     return false;
6569 
6570   // for now, we do this:
6571   // x = x | (x >> 1);
6572   // x = x | (x >> 2);
6573   // ...
6574   // x = x | (x >>16);
6575   // x = x | (x >>32); // for 64-bit input
6576   // return popcount(~x);
6577   //
6578   // Ref: "Hacker's Delight" by Henry Warren
6579   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6580     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6581     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6582                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6583   }
6584   Op = DAG.getNOT(dl, Op, VT);
6585   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6586   return true;
6587 }
6588 
6589 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6590                                 SelectionDAG &DAG) const {
6591   SDLoc dl(Node);
6592   EVT VT = Node->getValueType(0);
6593   SDValue Op = Node->getOperand(0);
6594   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6595 
6596   // If the non-ZERO_UNDEF version is supported we can use that instead.
6597   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6598       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6599     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6600     return true;
6601   }
6602 
6603   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6604   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6605     EVT SetCCVT =
6606         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6607     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6608     SDValue Zero = DAG.getConstant(0, dl, VT);
6609     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6610     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6611                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6612     return true;
6613   }
6614 
6615   // Only expand vector types if we have the appropriate vector bit operations.
6616   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6617                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6618                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6619                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6620                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6621                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6622     return false;
6623 
6624   // for now, we use: { return popcount(~x & (x - 1)); }
6625   // unless the target has ctlz but not ctpop, in which case we use:
6626   // { return 32 - nlz(~x & (x-1)); }
6627   // Ref: "Hacker's Delight" by Henry Warren
6628   SDValue Tmp = DAG.getNode(
6629       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6630       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6631 
6632   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6633   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6634     Result =
6635         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6636                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6637     return true;
6638   }
6639 
6640   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6641   return true;
6642 }
6643 
6644 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6645                                SelectionDAG &DAG) const {
6646   SDLoc dl(N);
6647   EVT VT = N->getValueType(0);
6648   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6649   SDValue Op = N->getOperand(0);
6650 
6651   // Only expand vector types if we have the appropriate vector operations.
6652   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6653                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6654                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6655     return false;
6656 
6657   SDValue Shift =
6658       DAG.getNode(ISD::SRA, dl, VT, Op,
6659                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6660   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6661   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6662   return true;
6663 }
6664 
6665 std::pair<SDValue, SDValue>
6666 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6667                                     SelectionDAG &DAG) const {
6668   SDLoc SL(LD);
6669   SDValue Chain = LD->getChain();
6670   SDValue BasePTR = LD->getBasePtr();
6671   EVT SrcVT = LD->getMemoryVT();
6672   EVT DstVT = LD->getValueType(0);
6673   ISD::LoadExtType ExtType = LD->getExtensionType();
6674 
6675   unsigned NumElem = SrcVT.getVectorNumElements();
6676 
6677   EVT SrcEltVT = SrcVT.getScalarType();
6678   EVT DstEltVT = DstVT.getScalarType();
6679 
6680   // A vector must always be stored in memory as-is, i.e. without any padding
6681   // between the elements, since various code depend on it, e.g. in the
6682   // handling of a bitcast of a vector type to int, which may be done with a
6683   // vector store followed by an integer load. A vector that does not have
6684   // elements that are byte-sized must therefore be stored as an integer
6685   // built out of the extracted vector elements.
6686   if (!SrcEltVT.isByteSized()) {
6687     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
6688     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
6689 
6690     unsigned NumSrcBits = SrcVT.getSizeInBits();
6691     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
6692 
6693     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
6694     SDValue SrcEltBitMask = DAG.getConstant(
6695         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
6696 
6697     // Load the whole vector and avoid masking off the top bits as it makes
6698     // the codegen worse.
6699     SDValue Load =
6700         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
6701                        LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
6702                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6703 
6704     SmallVector<SDValue, 8> Vals;
6705     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6706       unsigned ShiftIntoIdx =
6707           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6708       SDValue ShiftAmount =
6709           DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
6710                                      LoadVT, SL, /*LegalTypes=*/false);
6711       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
6712       SDValue Elt =
6713           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
6714       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
6715 
6716       if (ExtType != ISD::NON_EXTLOAD) {
6717         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
6718         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
6719       }
6720 
6721       Vals.push_back(Scalar);
6722     }
6723 
6724     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
6725     return std::make_pair(Value, Load.getValue(1));
6726   }
6727 
6728   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6729   assert(SrcEltVT.isByteSized());
6730 
6731   SmallVector<SDValue, 8> Vals;
6732   SmallVector<SDValue, 8> LoadChains;
6733 
6734   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6735     SDValue ScalarLoad =
6736         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6737                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6738                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6739                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6740 
6741     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6742 
6743     Vals.push_back(ScalarLoad.getValue(0));
6744     LoadChains.push_back(ScalarLoad.getValue(1));
6745   }
6746 
6747   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6748   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
6749 
6750   return std::make_pair(Value, NewChain);
6751 }
6752 
6753 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6754                                              SelectionDAG &DAG) const {
6755   SDLoc SL(ST);
6756 
6757   SDValue Chain = ST->getChain();
6758   SDValue BasePtr = ST->getBasePtr();
6759   SDValue Value = ST->getValue();
6760   EVT StVT = ST->getMemoryVT();
6761 
6762   // The type of the data we want to save
6763   EVT RegVT = Value.getValueType();
6764   EVT RegSclVT = RegVT.getScalarType();
6765 
6766   // The type of data as saved in memory.
6767   EVT MemSclVT = StVT.getScalarType();
6768 
6769   unsigned NumElem = StVT.getVectorNumElements();
6770 
6771   // A vector must always be stored in memory as-is, i.e. without any padding
6772   // between the elements, since various code depend on it, e.g. in the
6773   // handling of a bitcast of a vector type to int, which may be done with a
6774   // vector store followed by an integer load. A vector that does not have
6775   // elements that are byte-sized must therefore be stored as an integer
6776   // built out of the extracted vector elements.
6777   if (!MemSclVT.isByteSized()) {
6778     unsigned NumBits = StVT.getSizeInBits();
6779     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6780 
6781     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6782 
6783     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6784       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6785                                 DAG.getVectorIdxConstant(Idx, SL));
6786       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6787       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6788       unsigned ShiftIntoIdx =
6789           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6790       SDValue ShiftAmount =
6791           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6792       SDValue ShiftedElt =
6793           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6794       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6795     }
6796 
6797     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6798                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6799                         ST->getAAInfo());
6800   }
6801 
6802   // Store Stride in bytes
6803   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6804   assert(Stride && "Zero stride!");
6805   // Extract each of the elements from the original vector and save them into
6806   // memory individually.
6807   SmallVector<SDValue, 8> Stores;
6808   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6809     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6810                               DAG.getVectorIdxConstant(Idx, SL));
6811 
6812     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6813 
6814     // This scalar TruncStore may be illegal, but we legalize it later.
6815     SDValue Store = DAG.getTruncStore(
6816         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6817         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6818         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6819 
6820     Stores.push_back(Store);
6821   }
6822 
6823   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6824 }
6825 
6826 std::pair<SDValue, SDValue>
6827 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6828   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6829          "unaligned indexed loads not implemented!");
6830   SDValue Chain = LD->getChain();
6831   SDValue Ptr = LD->getBasePtr();
6832   EVT VT = LD->getValueType(0);
6833   EVT LoadedVT = LD->getMemoryVT();
6834   SDLoc dl(LD);
6835   auto &MF = DAG.getMachineFunction();
6836 
6837   if (VT.isFloatingPoint() || VT.isVector()) {
6838     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6839     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6840       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6841           LoadedVT.isVector()) {
6842         // Scalarize the load and let the individual components be handled.
6843         return scalarizeVectorLoad(LD, DAG);
6844       }
6845 
6846       // Expand to a (misaligned) integer load of the same size,
6847       // then bitconvert to floating point or vector.
6848       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6849                                     LD->getMemOperand());
6850       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6851       if (LoadedVT != VT)
6852         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6853                              ISD::ANY_EXTEND, dl, VT, Result);
6854 
6855       return std::make_pair(Result, newLoad.getValue(1));
6856     }
6857 
6858     // Copy the value to a (aligned) stack slot using (unaligned) integer
6859     // loads and stores, then do a (aligned) load from the stack slot.
6860     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6861     unsigned LoadedBytes = LoadedVT.getStoreSize();
6862     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6863     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6864 
6865     // Make sure the stack slot is also aligned for the register type.
6866     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6867     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6868     SmallVector<SDValue, 8> Stores;
6869     SDValue StackPtr = StackBase;
6870     unsigned Offset = 0;
6871 
6872     EVT PtrVT = Ptr.getValueType();
6873     EVT StackPtrVT = StackPtr.getValueType();
6874 
6875     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6876     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6877 
6878     // Do all but one copies using the full register width.
6879     for (unsigned i = 1; i < NumRegs; i++) {
6880       // Load one integer register's worth from the original location.
6881       SDValue Load = DAG.getLoad(
6882           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6883           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6884           LD->getAAInfo());
6885       // Follow the load with a store to the stack slot.  Remember the store.
6886       Stores.push_back(DAG.getStore(
6887           Load.getValue(1), dl, Load, StackPtr,
6888           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6889       // Increment the pointers.
6890       Offset += RegBytes;
6891 
6892       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6893       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6894     }
6895 
6896     // The last copy may be partial.  Do an extending load.
6897     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6898                                   8 * (LoadedBytes - Offset));
6899     SDValue Load =
6900         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6901                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6902                        MinAlign(LD->getAlignment(), Offset),
6903                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6904     // Follow the load with a store to the stack slot.  Remember the store.
6905     // On big-endian machines this requires a truncating store to ensure
6906     // that the bits end up in the right place.
6907     Stores.push_back(DAG.getTruncStore(
6908         Load.getValue(1), dl, Load, StackPtr,
6909         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6910 
6911     // The order of the stores doesn't matter - say it with a TokenFactor.
6912     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6913 
6914     // Finally, perform the original load only redirected to the stack slot.
6915     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6916                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6917                           LoadedVT);
6918 
6919     // Callers expect a MERGE_VALUES node.
6920     return std::make_pair(Load, TF);
6921   }
6922 
6923   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6924          "Unaligned load of unsupported type.");
6925 
6926   // Compute the new VT that is half the size of the old one.  This is an
6927   // integer MVT.
6928   unsigned NumBits = LoadedVT.getSizeInBits();
6929   EVT NewLoadedVT;
6930   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6931   NumBits >>= 1;
6932 
6933   unsigned Alignment = LD->getAlignment();
6934   unsigned IncrementSize = NumBits / 8;
6935   ISD::LoadExtType HiExtType = LD->getExtensionType();
6936 
6937   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6938   if (HiExtType == ISD::NON_EXTLOAD)
6939     HiExtType = ISD::ZEXTLOAD;
6940 
6941   // Load the value in two parts
6942   SDValue Lo, Hi;
6943   if (DAG.getDataLayout().isLittleEndian()) {
6944     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6945                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6946                         LD->getAAInfo());
6947 
6948     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6949     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6950                         LD->getPointerInfo().getWithOffset(IncrementSize),
6951                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6952                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6953   } else {
6954     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6955                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6956                         LD->getAAInfo());
6957 
6958     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6959     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6960                         LD->getPointerInfo().getWithOffset(IncrementSize),
6961                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6962                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6963   }
6964 
6965   // aggregate the two parts
6966   SDValue ShiftAmount =
6967       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6968                                                     DAG.getDataLayout()));
6969   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6970   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6971 
6972   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6973                              Hi.getValue(1));
6974 
6975   return std::make_pair(Result, TF);
6976 }
6977 
6978 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6979                                              SelectionDAG &DAG) const {
6980   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6981          "unaligned indexed stores not implemented!");
6982   SDValue Chain = ST->getChain();
6983   SDValue Ptr = ST->getBasePtr();
6984   SDValue Val = ST->getValue();
6985   EVT VT = Val.getValueType();
6986   int Alignment = ST->getAlignment();
6987   auto &MF = DAG.getMachineFunction();
6988   EVT StoreMemVT = ST->getMemoryVT();
6989 
6990   SDLoc dl(ST);
6991   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6992     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6993     if (isTypeLegal(intVT)) {
6994       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6995           StoreMemVT.isVector()) {
6996         // Scalarize the store and let the individual components be handled.
6997         SDValue Result = scalarizeVectorStore(ST, DAG);
6998         return Result;
6999       }
7000       // Expand to a bitconvert of the value to the integer type of the
7001       // same size, then a (misaligned) int store.
7002       // FIXME: Does not handle truncating floating point stores!
7003       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
7004       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
7005                             Alignment, ST->getMemOperand()->getFlags());
7006       return Result;
7007     }
7008     // Do a (aligned) store to a stack slot, then copy from the stack slot
7009     // to the final destination using (unaligned) integer loads and stores.
7010     MVT RegVT = getRegisterType(
7011         *DAG.getContext(),
7012         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
7013     EVT PtrVT = Ptr.getValueType();
7014     unsigned StoredBytes = StoreMemVT.getStoreSize();
7015     unsigned RegBytes = RegVT.getSizeInBits() / 8;
7016     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
7017 
7018     // Make sure the stack slot is also aligned for the register type.
7019     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
7020     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7021 
7022     // Perform the original store, only redirected to the stack slot.
7023     SDValue Store = DAG.getTruncStore(
7024         Chain, dl, Val, StackPtr,
7025         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
7026 
7027     EVT StackPtrVT = StackPtr.getValueType();
7028 
7029     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7030     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7031     SmallVector<SDValue, 8> Stores;
7032     unsigned Offset = 0;
7033 
7034     // Do all but one copies using the full register width.
7035     for (unsigned i = 1; i < NumRegs; i++) {
7036       // Load one integer register's worth from the stack slot.
7037       SDValue Load = DAG.getLoad(
7038           RegVT, dl, Store, StackPtr,
7039           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
7040       // Store it to the final location.  Remember the store.
7041       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
7042                                     ST->getPointerInfo().getWithOffset(Offset),
7043                                     MinAlign(ST->getAlignment(), Offset),
7044                                     ST->getMemOperand()->getFlags()));
7045       // Increment the pointers.
7046       Offset += RegBytes;
7047       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7048       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7049     }
7050 
7051     // The last store may be partial.  Do a truncating store.  On big-endian
7052     // machines this requires an extending load from the stack slot to ensure
7053     // that the bits are in the right place.
7054     EVT LoadMemVT =
7055         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
7056 
7057     // Load from the stack slot.
7058     SDValue Load = DAG.getExtLoad(
7059         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
7060         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
7061 
7062     Stores.push_back(
7063         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
7064                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
7065                           MinAlign(ST->getAlignment(), Offset),
7066                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
7067     // The order of the stores doesn't matter - say it with a TokenFactor.
7068     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7069     return Result;
7070   }
7071 
7072   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
7073          "Unaligned store of unknown type.");
7074   // Get the half-size VT
7075   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
7076   int NumBits = NewStoredVT.getSizeInBits();
7077   int IncrementSize = NumBits / 8;
7078 
7079   // Divide the stored value in two parts.
7080   SDValue ShiftAmount = DAG.getConstant(
7081       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
7082   SDValue Lo = Val;
7083   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
7084 
7085   // Store the two parts
7086   SDValue Store1, Store2;
7087   Store1 = DAG.getTruncStore(Chain, dl,
7088                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
7089                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
7090                              ST->getMemOperand()->getFlags());
7091 
7092   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
7093   Alignment = MinAlign(Alignment, IncrementSize);
7094   Store2 = DAG.getTruncStore(
7095       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
7096       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
7097       ST->getMemOperand()->getFlags(), ST->getAAInfo());
7098 
7099   SDValue Result =
7100       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
7101   return Result;
7102 }
7103 
7104 SDValue
7105 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
7106                                        const SDLoc &DL, EVT DataVT,
7107                                        SelectionDAG &DAG,
7108                                        bool IsCompressedMemory) const {
7109   SDValue Increment;
7110   EVT AddrVT = Addr.getValueType();
7111   EVT MaskVT = Mask.getValueType();
7112   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
7113          "Incompatible types of Data and Mask");
7114   if (IsCompressedMemory) {
7115     // Incrementing the pointer according to number of '1's in the mask.
7116     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
7117     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
7118     if (MaskIntVT.getSizeInBits() < 32) {
7119       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
7120       MaskIntVT = MVT::i32;
7121     }
7122 
7123     // Count '1's with POPCNT.
7124     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
7125     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
7126     // Scale is an element size in bytes.
7127     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
7128                                     AddrVT);
7129     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
7130   } else
7131     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
7132 
7133   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
7134 }
7135 
7136 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
7137                                        SDValue Idx,
7138                                        EVT VecVT,
7139                                        const SDLoc &dl) {
7140   if (isa<ConstantSDNode>(Idx))
7141     return Idx;
7142 
7143   EVT IdxVT = Idx.getValueType();
7144   unsigned NElts = VecVT.getVectorNumElements();
7145   if (isPowerOf2_32(NElts)) {
7146     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
7147                                      Log2_32(NElts));
7148     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
7149                        DAG.getConstant(Imm, dl, IdxVT));
7150   }
7151 
7152   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
7153                      DAG.getConstant(NElts - 1, dl, IdxVT));
7154 }
7155 
7156 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
7157                                                 SDValue VecPtr, EVT VecVT,
7158                                                 SDValue Index) const {
7159   SDLoc dl(Index);
7160   // Make sure the index type is big enough to compute in.
7161   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
7162 
7163   EVT EltVT = VecVT.getVectorElementType();
7164 
7165   // Calculate the element offset and add it to the pointer.
7166   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
7167   assert(EltSize * 8 == EltVT.getSizeInBits() &&
7168          "Converting bits to bytes lost precision");
7169 
7170   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
7171 
7172   EVT IdxVT = Index.getValueType();
7173 
7174   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
7175                       DAG.getConstant(EltSize, dl, IdxVT));
7176   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
7177 }
7178 
7179 //===----------------------------------------------------------------------===//
7180 // Implementation of Emulated TLS Model
7181 //===----------------------------------------------------------------------===//
7182 
7183 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
7184                                                 SelectionDAG &DAG) const {
7185   // Access to address of TLS varialbe xyz is lowered to a function call:
7186   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7187   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7188   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
7189   SDLoc dl(GA);
7190 
7191   ArgListTy Args;
7192   ArgListEntry Entry;
7193   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
7194   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
7195   StringRef EmuTlsVarName(NameString);
7196   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
7197   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
7198   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
7199   Entry.Ty = VoidPtrType;
7200   Args.push_back(Entry);
7201 
7202   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
7203 
7204   TargetLowering::CallLoweringInfo CLI(DAG);
7205   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
7206   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
7207   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7208 
7209   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7210   // At last for X86 targets, maybe good for other targets too?
7211   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7212   MFI.setAdjustsStack(true); // Is this only for X86 target?
7213   MFI.setHasCalls(true);
7214 
7215   assert((GA->getOffset() == 0) &&
7216          "Emulated TLS must have zero offset in GlobalAddressSDNode");
7217   return CallResult.first;
7218 }
7219 
7220 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7221                                                 SelectionDAG &DAG) const {
7222   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7223   if (!isCtlzFast())
7224     return SDValue();
7225   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7226   SDLoc dl(Op);
7227   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7228     if (C->isNullValue() && CC == ISD::SETEQ) {
7229       EVT VT = Op.getOperand(0).getValueType();
7230       SDValue Zext = Op.getOperand(0);
7231       if (VT.bitsLT(MVT::i32)) {
7232         VT = MVT::i32;
7233         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7234       }
7235       unsigned Log2b = Log2_32(VT.getSizeInBits());
7236       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7237       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7238                                 DAG.getConstant(Log2b, dl, MVT::i32));
7239       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7240     }
7241   }
7242   return SDValue();
7243 }
7244 
7245 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7246   unsigned Opcode = Node->getOpcode();
7247   SDValue LHS = Node->getOperand(0);
7248   SDValue RHS = Node->getOperand(1);
7249   EVT VT = LHS.getValueType();
7250   SDLoc dl(Node);
7251 
7252   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7253   assert(VT.isInteger() && "Expected operands to be integers");
7254 
7255   // usub.sat(a, b) -> umax(a, b) - b
7256   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
7257     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7258     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7259   }
7260 
7261   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
7262     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7263     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7264     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7265   }
7266 
7267   unsigned OverflowOp;
7268   switch (Opcode) {
7269   case ISD::SADDSAT:
7270     OverflowOp = ISD::SADDO;
7271     break;
7272   case ISD::UADDSAT:
7273     OverflowOp = ISD::UADDO;
7274     break;
7275   case ISD::SSUBSAT:
7276     OverflowOp = ISD::SSUBO;
7277     break;
7278   case ISD::USUBSAT:
7279     OverflowOp = ISD::USUBO;
7280     break;
7281   default:
7282     llvm_unreachable("Expected method to receive signed or unsigned saturation "
7283                      "addition or subtraction node.");
7284   }
7285 
7286   unsigned BitWidth = LHS.getScalarValueSizeInBits();
7287   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7288   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7289                                LHS, RHS);
7290   SDValue SumDiff = Result.getValue(0);
7291   SDValue Overflow = Result.getValue(1);
7292   SDValue Zero = DAG.getConstant(0, dl, VT);
7293   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7294 
7295   if (Opcode == ISD::UADDSAT) {
7296     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7297       // (LHS + RHS) | OverflowMask
7298       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7299       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7300     }
7301     // Overflow ? 0xffff.... : (LHS + RHS)
7302     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7303   } else if (Opcode == ISD::USUBSAT) {
7304     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7305       // (LHS - RHS) & ~OverflowMask
7306       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7307       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7308       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7309     }
7310     // Overflow ? 0 : (LHS - RHS)
7311     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7312   } else {
7313     // SatMax -> Overflow && SumDiff < 0
7314     // SatMin -> Overflow && SumDiff >= 0
7315     APInt MinVal = APInt::getSignedMinValue(BitWidth);
7316     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7317     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7318     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7319     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7320     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7321     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7322   }
7323 }
7324 
7325 SDValue
7326 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7327   assert((Node->getOpcode() == ISD::SMULFIX ||
7328           Node->getOpcode() == ISD::UMULFIX ||
7329           Node->getOpcode() == ISD::SMULFIXSAT ||
7330           Node->getOpcode() == ISD::UMULFIXSAT) &&
7331          "Expected a fixed point multiplication opcode");
7332 
7333   SDLoc dl(Node);
7334   SDValue LHS = Node->getOperand(0);
7335   SDValue RHS = Node->getOperand(1);
7336   EVT VT = LHS.getValueType();
7337   unsigned Scale = Node->getConstantOperandVal(2);
7338   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7339                      Node->getOpcode() == ISD::UMULFIXSAT);
7340   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7341                  Node->getOpcode() == ISD::SMULFIXSAT);
7342   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7343   unsigned VTSize = VT.getScalarSizeInBits();
7344 
7345   if (!Scale) {
7346     // [us]mul.fix(a, b, 0) -> mul(a, b)
7347     if (!Saturating) {
7348       if (isOperationLegalOrCustom(ISD::MUL, VT))
7349         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7350     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7351       SDValue Result =
7352           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7353       SDValue Product = Result.getValue(0);
7354       SDValue Overflow = Result.getValue(1);
7355       SDValue Zero = DAG.getConstant(0, dl, VT);
7356 
7357       APInt MinVal = APInt::getSignedMinValue(VTSize);
7358       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7359       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7360       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7361       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7362       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7363       return DAG.getSelect(dl, VT, Overflow, Result, Product);
7364     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7365       SDValue Result =
7366           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7367       SDValue Product = Result.getValue(0);
7368       SDValue Overflow = Result.getValue(1);
7369 
7370       APInt MaxVal = APInt::getMaxValue(VTSize);
7371       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7372       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7373     }
7374   }
7375 
7376   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7377          "Expected scale to be less than the number of bits if signed or at "
7378          "most the number of bits if unsigned.");
7379   assert(LHS.getValueType() == RHS.getValueType() &&
7380          "Expected both operands to be the same type");
7381 
7382   // Get the upper and lower bits of the result.
7383   SDValue Lo, Hi;
7384   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7385   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7386   if (isOperationLegalOrCustom(LoHiOp, VT)) {
7387     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7388     Lo = Result.getValue(0);
7389     Hi = Result.getValue(1);
7390   } else if (isOperationLegalOrCustom(HiOp, VT)) {
7391     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7392     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7393   } else if (VT.isVector()) {
7394     return SDValue();
7395   } else {
7396     report_fatal_error("Unable to expand fixed point multiplication.");
7397   }
7398 
7399   if (Scale == VTSize)
7400     // Result is just the top half since we'd be shifting by the width of the
7401     // operand. Overflow impossible so this works for both UMULFIX and
7402     // UMULFIXSAT.
7403     return Hi;
7404 
7405   // The result will need to be shifted right by the scale since both operands
7406   // are scaled. The result is given to us in 2 halves, so we only want part of
7407   // both in the result.
7408   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7409   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7410                                DAG.getConstant(Scale, dl, ShiftTy));
7411   if (!Saturating)
7412     return Result;
7413 
7414   if (!Signed) {
7415     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7416     // widened multiplication) aren't all zeroes.
7417 
7418     // Saturate to max if ((Hi >> Scale) != 0),
7419     // which is the same as if (Hi > ((1 << Scale) - 1))
7420     APInt MaxVal = APInt::getMaxValue(VTSize);
7421     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7422                                       dl, VT);
7423     Result = DAG.getSelectCC(dl, Hi, LowMask,
7424                              DAG.getConstant(MaxVal, dl, VT), Result,
7425                              ISD::SETUGT);
7426 
7427     return Result;
7428   }
7429 
7430   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7431   // widened multiplication) aren't all ones or all zeroes.
7432 
7433   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7434   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7435 
7436   if (Scale == 0) {
7437     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7438                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
7439     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7440     // Saturated to SatMin if wide product is negative, and SatMax if wide
7441     // product is positive ...
7442     SDValue Zero = DAG.getConstant(0, dl, VT);
7443     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7444                                                ISD::SETLT);
7445     // ... but only if we overflowed.
7446     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7447   }
7448 
7449   //  We handled Scale==0 above so all the bits to examine is in Hi.
7450 
7451   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7452   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7453   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7454                                     dl, VT);
7455   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7456   // Saturate to min if (Hi >> (Scale - 1)) < -1),
7457   // which is the same as if (HI < (-1 << (Scale - 1))
7458   SDValue HighMask =
7459       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7460                       dl, VT);
7461   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7462   return Result;
7463 }
7464 
7465 SDValue
7466 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
7467                                     SDValue LHS, SDValue RHS,
7468                                     unsigned Scale, SelectionDAG &DAG) const {
7469   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
7470           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
7471          "Expected a fixed point division opcode");
7472 
7473   EVT VT = LHS.getValueType();
7474   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
7475   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
7476   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7477 
7478   // If there is enough room in the type to upscale the LHS or downscale the
7479   // RHS before the division, we can perform it in this type without having to
7480   // resize. For signed operations, the LHS headroom is the number of
7481   // redundant sign bits, and for unsigned ones it is the number of zeroes.
7482   // The headroom for the RHS is the number of trailing zeroes.
7483   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
7484                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
7485   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
7486 
7487   // For signed saturating operations, we need to be able to detect true integer
7488   // division overflow; that is, when you have MIN / -EPS. However, this
7489   // is undefined behavior and if we emit divisions that could take such
7490   // values it may cause undesired behavior (arithmetic exceptions on x86, for
7491   // example).
7492   // Avoid this by requiring an extra bit so that we never get this case.
7493   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
7494   // signed saturating division, we need to emit a whopping 32-bit division.
7495   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
7496     return SDValue();
7497 
7498   unsigned LHSShift = std::min(LHSLead, Scale);
7499   unsigned RHSShift = Scale - LHSShift;
7500 
7501   // At this point, we know that if we shift the LHS up by LHSShift and the
7502   // RHS down by RHSShift, we can emit a regular division with a final scaling
7503   // factor of Scale.
7504 
7505   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7506   if (LHSShift)
7507     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
7508                       DAG.getConstant(LHSShift, dl, ShiftTy));
7509   if (RHSShift)
7510     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
7511                       DAG.getConstant(RHSShift, dl, ShiftTy));
7512 
7513   SDValue Quot;
7514   if (Signed) {
7515     // For signed operations, if the resulting quotient is negative and the
7516     // remainder is nonzero, subtract 1 from the quotient to round towards
7517     // negative infinity.
7518     SDValue Rem;
7519     // FIXME: Ideally we would always produce an SDIVREM here, but if the
7520     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
7521     // we couldn't just form a libcall, but the type legalizer doesn't do it.
7522     if (isTypeLegal(VT) &&
7523         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
7524       Quot = DAG.getNode(ISD::SDIVREM, dl,
7525                          DAG.getVTList(VT, VT),
7526                          LHS, RHS);
7527       Rem = Quot.getValue(1);
7528       Quot = Quot.getValue(0);
7529     } else {
7530       Quot = DAG.getNode(ISD::SDIV, dl, VT,
7531                          LHS, RHS);
7532       Rem = DAG.getNode(ISD::SREM, dl, VT,
7533                         LHS, RHS);
7534     }
7535     SDValue Zero = DAG.getConstant(0, dl, VT);
7536     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
7537     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
7538     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
7539     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
7540     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
7541                                DAG.getConstant(1, dl, VT));
7542     Quot = DAG.getSelect(dl, VT,
7543                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
7544                          Sub1, Quot);
7545   } else
7546     Quot = DAG.getNode(ISD::UDIV, dl, VT,
7547                        LHS, RHS);
7548 
7549   return Quot;
7550 }
7551 
7552 void TargetLowering::expandUADDSUBO(
7553     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7554   SDLoc dl(Node);
7555   SDValue LHS = Node->getOperand(0);
7556   SDValue RHS = Node->getOperand(1);
7557   bool IsAdd = Node->getOpcode() == ISD::UADDO;
7558 
7559   // If ADD/SUBCARRY is legal, use that instead.
7560   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7561   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7562     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7563     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7564                                     { LHS, RHS, CarryIn });
7565     Result = SDValue(NodeCarry.getNode(), 0);
7566     Overflow = SDValue(NodeCarry.getNode(), 1);
7567     return;
7568   }
7569 
7570   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7571                             LHS.getValueType(), LHS, RHS);
7572 
7573   EVT ResultType = Node->getValueType(1);
7574   EVT SetCCType = getSetCCResultType(
7575       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7576   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7577   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7578   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7579 }
7580 
7581 void TargetLowering::expandSADDSUBO(
7582     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7583   SDLoc dl(Node);
7584   SDValue LHS = Node->getOperand(0);
7585   SDValue RHS = Node->getOperand(1);
7586   bool IsAdd = Node->getOpcode() == ISD::SADDO;
7587 
7588   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7589                             LHS.getValueType(), LHS, RHS);
7590 
7591   EVT ResultType = Node->getValueType(1);
7592   EVT OType = getSetCCResultType(
7593       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7594 
7595   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7596   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7597   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7598     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7599     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7600     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7601     return;
7602   }
7603 
7604   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7605 
7606   // For an addition, the result should be less than one of the operands (LHS)
7607   // if and only if the other operand (RHS) is negative, otherwise there will
7608   // be overflow.
7609   // For a subtraction, the result should be less than one of the operands
7610   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7611   // otherwise there will be overflow.
7612   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7613   SDValue ConditionRHS =
7614       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7615 
7616   Overflow = DAG.getBoolExtOrTrunc(
7617       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7618       ResultType, ResultType);
7619 }
7620 
7621 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7622                                 SDValue &Overflow, SelectionDAG &DAG) const {
7623   SDLoc dl(Node);
7624   EVT VT = Node->getValueType(0);
7625   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7626   SDValue LHS = Node->getOperand(0);
7627   SDValue RHS = Node->getOperand(1);
7628   bool isSigned = Node->getOpcode() == ISD::SMULO;
7629 
7630   // For power-of-two multiplications we can use a simpler shift expansion.
7631   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7632     const APInt &C = RHSC->getAPIntValue();
7633     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7634     if (C.isPowerOf2()) {
7635       // smulo(x, signed_min) is same as umulo(x, signed_min).
7636       bool UseArithShift = isSigned && !C.isMinSignedValue();
7637       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7638       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7639       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7640       Overflow = DAG.getSetCC(dl, SetCCVT,
7641           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7642                       dl, VT, Result, ShiftAmt),
7643           LHS, ISD::SETNE);
7644       return true;
7645     }
7646   }
7647 
7648   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7649   if (VT.isVector())
7650     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7651                               VT.getVectorNumElements());
7652 
7653   SDValue BottomHalf;
7654   SDValue TopHalf;
7655   static const unsigned Ops[2][3] =
7656       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7657         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7658   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7659     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7660     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7661   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7662     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7663                              RHS);
7664     TopHalf = BottomHalf.getValue(1);
7665   } else if (isTypeLegal(WideVT)) {
7666     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7667     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7668     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7669     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7670     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7671         getShiftAmountTy(WideVT, DAG.getDataLayout()));
7672     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7673                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7674   } else {
7675     if (VT.isVector())
7676       return false;
7677 
7678     // We can fall back to a libcall with an illegal type for the MUL if we
7679     // have a libcall big enough.
7680     // Also, we can fall back to a division in some cases, but that's a big
7681     // performance hit in the general case.
7682     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7683     if (WideVT == MVT::i16)
7684       LC = RTLIB::MUL_I16;
7685     else if (WideVT == MVT::i32)
7686       LC = RTLIB::MUL_I32;
7687     else if (WideVT == MVT::i64)
7688       LC = RTLIB::MUL_I64;
7689     else if (WideVT == MVT::i128)
7690       LC = RTLIB::MUL_I128;
7691     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7692 
7693     SDValue HiLHS;
7694     SDValue HiRHS;
7695     if (isSigned) {
7696       // The high part is obtained by SRA'ing all but one of the bits of low
7697       // part.
7698       unsigned LoSize = VT.getSizeInBits();
7699       HiLHS =
7700           DAG.getNode(ISD::SRA, dl, VT, LHS,
7701                       DAG.getConstant(LoSize - 1, dl,
7702                                       getPointerTy(DAG.getDataLayout())));
7703       HiRHS =
7704           DAG.getNode(ISD::SRA, dl, VT, RHS,
7705                       DAG.getConstant(LoSize - 1, dl,
7706                                       getPointerTy(DAG.getDataLayout())));
7707     } else {
7708         HiLHS = DAG.getConstant(0, dl, VT);
7709         HiRHS = DAG.getConstant(0, dl, VT);
7710     }
7711 
7712     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7713     // pre-lowered to the correct types. This all depends upon WideVT not
7714     // being a legal type for the architecture and thus has to be split to
7715     // two arguments.
7716     SDValue Ret;
7717     TargetLowering::MakeLibCallOptions CallOptions;
7718     CallOptions.setSExt(isSigned);
7719     CallOptions.setIsPostTypeLegalization(true);
7720     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7721       // Halves of WideVT are packed into registers in different order
7722       // depending on platform endianness. This is usually handled by
7723       // the C calling convention, but we can't defer to it in
7724       // the legalizer.
7725       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7726       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7727     } else {
7728       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7729       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7730     }
7731     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7732            "Ret value is a collection of constituent nodes holding result.");
7733     if (DAG.getDataLayout().isLittleEndian()) {
7734       // Same as above.
7735       BottomHalf = Ret.getOperand(0);
7736       TopHalf = Ret.getOperand(1);
7737     } else {
7738       BottomHalf = Ret.getOperand(1);
7739       TopHalf = Ret.getOperand(0);
7740     }
7741   }
7742 
7743   Result = BottomHalf;
7744   if (isSigned) {
7745     SDValue ShiftAmt = DAG.getConstant(
7746         VT.getScalarSizeInBits() - 1, dl,
7747         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7748     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7749     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7750   } else {
7751     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7752                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7753   }
7754 
7755   // Truncate the result if SetCC returns a larger type than needed.
7756   EVT RType = Node->getValueType(1);
7757   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7758     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7759 
7760   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7761          "Unexpected result type for S/UMULO legalization");
7762   return true;
7763 }
7764 
7765 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7766   SDLoc dl(Node);
7767   bool NoNaN = Node->getFlags().hasNoNaNs();
7768   unsigned BaseOpcode = 0;
7769   switch (Node->getOpcode()) {
7770   default: llvm_unreachable("Expected VECREDUCE opcode");
7771   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7772   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7773   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7774   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7775   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7776   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7777   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7778   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7779   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7780   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7781   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7782   case ISD::VECREDUCE_FMAX:
7783     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7784     break;
7785   case ISD::VECREDUCE_FMIN:
7786     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7787     break;
7788   }
7789 
7790   SDValue Op = Node->getOperand(0);
7791   EVT VT = Op.getValueType();
7792 
7793   // Try to use a shuffle reduction for power of two vectors.
7794   if (VT.isPow2VectorType()) {
7795     while (VT.getVectorNumElements() > 1) {
7796       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7797       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7798         break;
7799 
7800       SDValue Lo, Hi;
7801       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7802       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7803       VT = HalfVT;
7804     }
7805   }
7806 
7807   EVT EltVT = VT.getVectorElementType();
7808   unsigned NumElts = VT.getVectorNumElements();
7809 
7810   SmallVector<SDValue, 8> Ops;
7811   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7812 
7813   SDValue Res = Ops[0];
7814   for (unsigned i = 1; i < NumElts; i++)
7815     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7816 
7817   // Result type may be wider than element type.
7818   if (EltVT != Node->getValueType(0))
7819     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7820   return Res;
7821 }
7822