1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // Conservatively require the attributes of the call to match those of
56   // the return. Ignore NoAlias and NonNull because they don't affect the
57   // call sequence.
58   AttributeList CallerAttrs = F.getAttributes();
59   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
60           .removeAttribute(Attribute::NoAlias)
61           .removeAttribute(Attribute::NonNull)
62           .hasAttributes())
63     return false;
64 
65   // It's not safe to eliminate the sign / zero extension of the return value.
66   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
67       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
68     return false;
69 
70   // Check if the only use is a function return node.
71   return isUsedByReturnOnly(Node, Chain);
72 }
73 
74 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
75     const uint32_t *CallerPreservedMask,
76     const SmallVectorImpl<CCValAssign> &ArgLocs,
77     const SmallVectorImpl<SDValue> &OutVals) const {
78   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
79     const CCValAssign &ArgLoc = ArgLocs[I];
80     if (!ArgLoc.isRegLoc())
81       continue;
82     Register Reg = ArgLoc.getLocReg();
83     // Only look at callee saved registers.
84     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
85       continue;
86     // Check that we pass the value used for the caller.
87     // (We look for a CopyFromReg reading a virtual register that is used
88     //  for the function live-in value of register Reg)
89     SDValue Value = OutVals[I];
90     if (Value->getOpcode() != ISD::CopyFromReg)
91       return false;
92     unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
93     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
94       return false;
95   }
96   return true;
97 }
98 
99 /// Set CallLoweringInfo attribute flags based on a call instruction
100 /// and called function attributes.
101 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
102                                                      unsigned ArgIdx) {
103   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
104   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
105   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
106   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
107   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
108   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
109   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
110   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
111   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
112   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
113   Alignment = Call->getParamAlignment(ArgIdx);
114   ByValType = nullptr;
115   if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
116     ByValType = Call->getParamByValType(ArgIdx);
117 }
118 
119 /// Generate a libcall taking the given operands as arguments and returning a
120 /// result of type RetVT.
121 std::pair<SDValue, SDValue>
122 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
123                             ArrayRef<SDValue> Ops,
124                             MakeLibCallOptions CallOptions,
125                             const SDLoc &dl,
126                             SDValue InChain) const {
127   if (!InChain)
128     InChain = DAG.getEntryNode();
129 
130   TargetLowering::ArgListTy Args;
131   Args.reserve(Ops.size());
132 
133   TargetLowering::ArgListEntry Entry;
134   for (unsigned i = 0; i < Ops.size(); ++i) {
135     SDValue NewOp = Ops[i];
136     Entry.Node = NewOp;
137     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
138     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
139                                                  CallOptions.IsSExt);
140     Entry.IsZExt = !Entry.IsSExt;
141 
142     if (CallOptions.IsSoften &&
143         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
144       Entry.IsSExt = Entry.IsZExt = false;
145     }
146     Args.push_back(Entry);
147   }
148 
149   if (LC == RTLIB::UNKNOWN_LIBCALL)
150     report_fatal_error("Unsupported library call operation!");
151   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
152                                          getPointerTy(DAG.getDataLayout()));
153 
154   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
155   TargetLowering::CallLoweringInfo CLI(DAG);
156   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
157   bool zeroExtend = !signExtend;
158 
159   if (CallOptions.IsSoften &&
160       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
161     signExtend = zeroExtend = false;
162   }
163 
164   CLI.setDebugLoc(dl)
165       .setChain(InChain)
166       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
167       .setNoReturn(CallOptions.DoesNotReturn)
168       .setDiscardResult(!CallOptions.IsReturnValueUsed)
169       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
170       .setSExtResult(signExtend)
171       .setZExtResult(zeroExtend);
172   return LowerCallTo(CLI);
173 }
174 
175 bool
176 TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
177                                          unsigned Limit, uint64_t Size,
178                                          unsigned DstAlign, unsigned SrcAlign,
179                                          bool IsMemset,
180                                          bool ZeroMemset,
181                                          bool MemcpyStrSrc,
182                                          bool AllowOverlap,
183                                          unsigned DstAS, unsigned SrcAS,
184                                          const AttributeList &FuncAttributes) const {
185   // If 'SrcAlign' is zero, that means the memory operation does not need to
186   // load the value, i.e. memset or memcpy from constant string. Otherwise,
187   // it's the inferred alignment of the source. 'DstAlign', on the other hand,
188   // is the specified alignment of the memory operation. If it is zero, that
189   // means it's possible to change the alignment of the destination.
190   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
191   // not need to be loaded.
192   if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
193     return false;
194 
195   EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
196                                IsMemset, ZeroMemset, MemcpyStrSrc,
197                                FuncAttributes);
198 
199   if (VT == MVT::Other) {
200     // Use the largest integer type whose alignment constraints are satisfied.
201     // We only need to check DstAlign here as SrcAlign is always greater or
202     // equal to DstAlign (or zero).
203     VT = MVT::i64;
204     while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
205            !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
206       VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
207     assert(VT.isInteger());
208 
209     // Find the largest legal integer type.
210     MVT LVT = MVT::i64;
211     while (!isTypeLegal(LVT))
212       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
213     assert(LVT.isInteger());
214 
215     // If the type we've chosen is larger than the largest legal integer type
216     // then use that instead.
217     if (VT.bitsGT(LVT))
218       VT = LVT;
219   }
220 
221   unsigned NumMemOps = 0;
222   while (Size != 0) {
223     unsigned VTSize = VT.getSizeInBits() / 8;
224     while (VTSize > Size) {
225       // For now, only use non-vector load / store's for the left-over pieces.
226       EVT NewVT = VT;
227       unsigned NewVTSize;
228 
229       bool Found = false;
230       if (VT.isVector() || VT.isFloatingPoint()) {
231         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
232         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
233             isSafeMemOpType(NewVT.getSimpleVT()))
234           Found = true;
235         else if (NewVT == MVT::i64 &&
236                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
237                  isSafeMemOpType(MVT::f64)) {
238           // i64 is usually not legal on 32-bit targets, but f64 may be.
239           NewVT = MVT::f64;
240           Found = true;
241         }
242       }
243 
244       if (!Found) {
245         do {
246           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
247           if (NewVT == MVT::i8)
248             break;
249         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
250       }
251       NewVTSize = NewVT.getSizeInBits() / 8;
252 
253       // If the new VT cannot cover all of the remaining bits, then consider
254       // issuing a (or a pair of) unaligned and overlapping load / store.
255       bool Fast;
256       if (NumMemOps && AllowOverlap && NewVTSize < Size &&
257           allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
258                                          MachineMemOperand::MONone, &Fast) &&
259           Fast)
260         VTSize = Size;
261       else {
262         VT = NewVT;
263         VTSize = NewVTSize;
264       }
265     }
266 
267     if (++NumMemOps > Limit)
268       return false;
269 
270     MemOps.push_back(VT);
271     Size -= VTSize;
272   }
273 
274   return true;
275 }
276 
277 /// Soften the operands of a comparison. This code is shared among BR_CC,
278 /// SELECT_CC, and SETCC handlers.
279 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
280                                          SDValue &NewLHS, SDValue &NewRHS,
281                                          ISD::CondCode &CCCode,
282                                          const SDLoc &dl, const SDValue OldLHS,
283                                          const SDValue OldRHS) const {
284   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
285          && "Unsupported setcc type!");
286 
287   // Expand into one or more soft-fp libcall(s).
288   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
289   bool ShouldInvertCC = false;
290   switch (CCCode) {
291   case ISD::SETEQ:
292   case ISD::SETOEQ:
293     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
294           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
295           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
296     break;
297   case ISD::SETNE:
298   case ISD::SETUNE:
299     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
300           (VT == MVT::f64) ? RTLIB::UNE_F64 :
301           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
302     break;
303   case ISD::SETGE:
304   case ISD::SETOGE:
305     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
306           (VT == MVT::f64) ? RTLIB::OGE_F64 :
307           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
308     break;
309   case ISD::SETLT:
310   case ISD::SETOLT:
311     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
312           (VT == MVT::f64) ? RTLIB::OLT_F64 :
313           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
314     break;
315   case ISD::SETLE:
316   case ISD::SETOLE:
317     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
318           (VT == MVT::f64) ? RTLIB::OLE_F64 :
319           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
320     break;
321   case ISD::SETGT:
322   case ISD::SETOGT:
323     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
324           (VT == MVT::f64) ? RTLIB::OGT_F64 :
325           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
326     break;
327   case ISD::SETUO:
328     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
329           (VT == MVT::f64) ? RTLIB::UO_F64 :
330           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
331     break;
332   case ISD::SETO:
333     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
334           (VT == MVT::f64) ? RTLIB::O_F64 :
335           (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
336     break;
337   case ISD::SETONE:
338     // SETONE = SETOLT | SETOGT
339     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340           (VT == MVT::f64) ? RTLIB::OLT_F64 :
341           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342     LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
343           (VT == MVT::f64) ? RTLIB::OGT_F64 :
344           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
345     break;
346   case ISD::SETUEQ:
347     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
348           (VT == MVT::f64) ? RTLIB::UO_F64 :
349           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
350     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
351           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
352           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
353     break;
354   default:
355     // Invert CC for unordered comparisons
356     ShouldInvertCC = true;
357     switch (CCCode) {
358     case ISD::SETULT:
359       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
360             (VT == MVT::f64) ? RTLIB::OGE_F64 :
361             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
362       break;
363     case ISD::SETULE:
364       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
365             (VT == MVT::f64) ? RTLIB::OGT_F64 :
366             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
367       break;
368     case ISD::SETUGT:
369       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370             (VT == MVT::f64) ? RTLIB::OLE_F64 :
371             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372       break;
373     case ISD::SETUGE:
374       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
375             (VT == MVT::f64) ? RTLIB::OLT_F64 :
376             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
377       break;
378     default: llvm_unreachable("Do not know how to soften this setcc!");
379     }
380   }
381 
382   // Use the target specific return value for comparions lib calls.
383   EVT RetVT = getCmpLibcallReturnType();
384   SDValue Ops[2] = {NewLHS, NewRHS};
385   TargetLowering::MakeLibCallOptions CallOptions;
386   EVT OpsVT[2] = { OldLHS.getValueType(),
387                    OldRHS.getValueType() };
388   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
389   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
390   NewRHS = DAG.getConstant(0, dl, RetVT);
391 
392   CCCode = getCmpLibcallCC(LC1);
393   if (ShouldInvertCC)
394     CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
395 
396   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
397     SDValue Tmp = DAG.getNode(
398         ISD::SETCC, dl,
399         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
400         NewLHS, NewRHS, DAG.getCondCode(CCCode));
401     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
402     NewLHS = DAG.getNode(
403         ISD::SETCC, dl,
404         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
405         NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
406     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
407     NewRHS = SDValue();
408   }
409 }
410 
411 /// Return the entry encoding for a jump table in the current function. The
412 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
413 unsigned TargetLowering::getJumpTableEncoding() const {
414   // In non-pic modes, just use the address of a block.
415   if (!isPositionIndependent())
416     return MachineJumpTableInfo::EK_BlockAddress;
417 
418   // In PIC mode, if the target supports a GPRel32 directive, use it.
419   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
420     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
421 
422   // Otherwise, use a label difference.
423   return MachineJumpTableInfo::EK_LabelDifference32;
424 }
425 
426 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
427                                                  SelectionDAG &DAG) const {
428   // If our PIC model is GP relative, use the global offset table as the base.
429   unsigned JTEncoding = getJumpTableEncoding();
430 
431   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
432       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
433     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
434 
435   return Table;
436 }
437 
438 /// This returns the relocation base for the given PIC jumptable, the same as
439 /// getPICJumpTableRelocBase, but as an MCExpr.
440 const MCExpr *
441 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
442                                              unsigned JTI,MCContext &Ctx) const{
443   // The normal PIC reloc base is the label at the start of the jump table.
444   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
445 }
446 
447 bool
448 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
449   const TargetMachine &TM = getTargetMachine();
450   const GlobalValue *GV = GA->getGlobal();
451 
452   // If the address is not even local to this DSO we will have to load it from
453   // a got and then add the offset.
454   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
455     return false;
456 
457   // If the code is position independent we will have to add a base register.
458   if (isPositionIndependent())
459     return false;
460 
461   // Otherwise we can do it.
462   return true;
463 }
464 
465 //===----------------------------------------------------------------------===//
466 //  Optimization Methods
467 //===----------------------------------------------------------------------===//
468 
469 /// If the specified instruction has a constant integer operand and there are
470 /// bits set in that constant that are not demanded, then clear those bits and
471 /// return true.
472 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
473                                             TargetLoweringOpt &TLO) const {
474   SDLoc DL(Op);
475   unsigned Opcode = Op.getOpcode();
476 
477   // Do target-specific constant optimization.
478   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
479     return TLO.New.getNode();
480 
481   // FIXME: ISD::SELECT, ISD::SELECT_CC
482   switch (Opcode) {
483   default:
484     break;
485   case ISD::XOR:
486   case ISD::AND:
487   case ISD::OR: {
488     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
489     if (!Op1C)
490       return false;
491 
492     // If this is a 'not' op, don't touch it because that's a canonical form.
493     const APInt &C = Op1C->getAPIntValue();
494     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
495       return false;
496 
497     if (!C.isSubsetOf(Demanded)) {
498       EVT VT = Op.getValueType();
499       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
500       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
501       return TLO.CombineTo(Op, NewOp);
502     }
503 
504     break;
505   }
506   }
507 
508   return false;
509 }
510 
511 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
512 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
513 /// generalized for targets with other types of implicit widening casts.
514 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
515                                       const APInt &Demanded,
516                                       TargetLoweringOpt &TLO) const {
517   assert(Op.getNumOperands() == 2 &&
518          "ShrinkDemandedOp only supports binary operators!");
519   assert(Op.getNode()->getNumValues() == 1 &&
520          "ShrinkDemandedOp only supports nodes with one result!");
521 
522   SelectionDAG &DAG = TLO.DAG;
523   SDLoc dl(Op);
524 
525   // Early return, as this function cannot handle vector types.
526   if (Op.getValueType().isVector())
527     return false;
528 
529   // Don't do this if the node has another user, which may require the
530   // full value.
531   if (!Op.getNode()->hasOneUse())
532     return false;
533 
534   // Search for the smallest integer type with free casts to and from
535   // Op's type. For expedience, just check power-of-2 integer types.
536   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
537   unsigned DemandedSize = Demanded.getActiveBits();
538   unsigned SmallVTBits = DemandedSize;
539   if (!isPowerOf2_32(SmallVTBits))
540     SmallVTBits = NextPowerOf2(SmallVTBits);
541   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
542     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
543     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
544         TLI.isZExtFree(SmallVT, Op.getValueType())) {
545       // We found a type with free casts.
546       SDValue X = DAG.getNode(
547           Op.getOpcode(), dl, SmallVT,
548           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
549           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
550       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
551       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
552       return TLO.CombineTo(Op, Z);
553     }
554   }
555   return false;
556 }
557 
558 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
559                                           DAGCombinerInfo &DCI) const {
560   SelectionDAG &DAG = DCI.DAG;
561   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
562                         !DCI.isBeforeLegalizeOps());
563   KnownBits Known;
564 
565   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
566   if (Simplified) {
567     DCI.AddToWorklist(Op.getNode());
568     DCI.CommitTargetLoweringOpt(TLO);
569   }
570   return Simplified;
571 }
572 
573 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
574                                           KnownBits &Known,
575                                           TargetLoweringOpt &TLO,
576                                           unsigned Depth,
577                                           bool AssumeSingleUse) const {
578   EVT VT = Op.getValueType();
579   APInt DemandedElts = VT.isVector()
580                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
581                            : APInt(1, 1);
582   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
583                               AssumeSingleUse);
584 }
585 
586 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
587 // TODO: Under what circumstances can we create nodes? Constant folding?
588 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
589     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
590     SelectionDAG &DAG, unsigned Depth) const {
591   // Limit search depth.
592   if (Depth >= SelectionDAG::MaxRecursionDepth)
593     return SDValue();
594 
595   // Ignore UNDEFs.
596   if (Op.isUndef())
597     return SDValue();
598 
599   // Not demanding any bits/elts from Op.
600   if (DemandedBits == 0 || DemandedElts == 0)
601     return DAG.getUNDEF(Op.getValueType());
602 
603   unsigned NumElts = DemandedElts.getBitWidth();
604   KnownBits LHSKnown, RHSKnown;
605   switch (Op.getOpcode()) {
606   case ISD::BITCAST: {
607     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
608     EVT SrcVT = Src.getValueType();
609     EVT DstVT = Op.getValueType();
610     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
611     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
612 
613     if (NumSrcEltBits == NumDstEltBits)
614       if (SDValue V = SimplifyMultipleUseDemandedBits(
615               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
616         return DAG.getBitcast(DstVT, V);
617 
618     // TODO - bigendian once we have test coverage.
619     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
620         DAG.getDataLayout().isLittleEndian()) {
621       unsigned Scale = NumDstEltBits / NumSrcEltBits;
622       unsigned NumSrcElts = SrcVT.getVectorNumElements();
623       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
624       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
625       for (unsigned i = 0; i != Scale; ++i) {
626         unsigned Offset = i * NumSrcEltBits;
627         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
628         if (!Sub.isNullValue()) {
629           DemandedSrcBits |= Sub;
630           for (unsigned j = 0; j != NumElts; ++j)
631             if (DemandedElts[j])
632               DemandedSrcElts.setBit((j * Scale) + i);
633         }
634       }
635 
636       if (SDValue V = SimplifyMultipleUseDemandedBits(
637               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
638         return DAG.getBitcast(DstVT, V);
639     }
640 
641     // TODO - bigendian once we have test coverage.
642     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
643         DAG.getDataLayout().isLittleEndian()) {
644       unsigned Scale = NumSrcEltBits / NumDstEltBits;
645       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
646       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
647       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
648       for (unsigned i = 0; i != NumElts; ++i)
649         if (DemandedElts[i]) {
650           unsigned Offset = (i % Scale) * NumDstEltBits;
651           DemandedSrcBits.insertBits(DemandedBits, Offset);
652           DemandedSrcElts.setBit(i / Scale);
653         }
654 
655       if (SDValue V = SimplifyMultipleUseDemandedBits(
656               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
657         return DAG.getBitcast(DstVT, V);
658     }
659 
660     break;
661   }
662   case ISD::AND: {
663     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
664     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
665 
666     // If all of the demanded bits are known 1 on one side, return the other.
667     // These bits cannot contribute to the result of the 'and' in this
668     // context.
669     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
670       return Op.getOperand(0);
671     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
672       return Op.getOperand(1);
673     break;
674   }
675   case ISD::OR: {
676     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
677     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
678 
679     // If all of the demanded bits are known zero on one side, return the
680     // other.  These bits cannot contribute to the result of the 'or' in this
681     // context.
682     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
683       return Op.getOperand(0);
684     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
685       return Op.getOperand(1);
686     break;
687   }
688   case ISD::XOR: {
689     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
690     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
691 
692     // If all of the demanded bits are known zero on one side, return the
693     // other.
694     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
695       return Op.getOperand(0);
696     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
697       return Op.getOperand(1);
698     break;
699   }
700   case ISD::SIGN_EXTEND_INREG: {
701     // If none of the extended bits are demanded, eliminate the sextinreg.
702     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
703     if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
704       return Op.getOperand(0);
705     break;
706   }
707   case ISD::INSERT_VECTOR_ELT: {
708     // If we don't demand the inserted element, return the base vector.
709     SDValue Vec = Op.getOperand(0);
710     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
711     EVT VecVT = Vec.getValueType();
712     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
713         !DemandedElts[CIdx->getZExtValue()])
714       return Vec;
715     break;
716   }
717   case ISD::VECTOR_SHUFFLE: {
718     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
719 
720     // If all the demanded elts are from one operand and are inline,
721     // then we can use the operand directly.
722     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
723     for (unsigned i = 0; i != NumElts; ++i) {
724       int M = ShuffleMask[i];
725       if (M < 0 || !DemandedElts[i])
726         continue;
727       AllUndef = false;
728       IdentityLHS &= (M == (int)i);
729       IdentityRHS &= ((M - NumElts) == i);
730     }
731 
732     if (AllUndef)
733       return DAG.getUNDEF(Op.getValueType());
734     if (IdentityLHS)
735       return Op.getOperand(0);
736     if (IdentityRHS)
737       return Op.getOperand(1);
738     break;
739   }
740   default:
741     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
742       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
743               Op, DemandedBits, DemandedElts, DAG, Depth))
744         return V;
745     break;
746   }
747   return SDValue();
748 }
749 
750 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
751 /// result of Op are ever used downstream. If we can use this information to
752 /// simplify Op, create a new simplified DAG node and return true, returning the
753 /// original and new nodes in Old and New. Otherwise, analyze the expression and
754 /// return a mask of Known bits for the expression (used to simplify the
755 /// caller).  The Known bits may only be accurate for those bits in the
756 /// OriginalDemandedBits and OriginalDemandedElts.
757 bool TargetLowering::SimplifyDemandedBits(
758     SDValue Op, const APInt &OriginalDemandedBits,
759     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
760     unsigned Depth, bool AssumeSingleUse) const {
761   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
762   assert(Op.getScalarValueSizeInBits() == BitWidth &&
763          "Mask size mismatches value type size!");
764 
765   unsigned NumElts = OriginalDemandedElts.getBitWidth();
766   assert((!Op.getValueType().isVector() ||
767           NumElts == Op.getValueType().getVectorNumElements()) &&
768          "Unexpected vector size");
769 
770   APInt DemandedBits = OriginalDemandedBits;
771   APInt DemandedElts = OriginalDemandedElts;
772   SDLoc dl(Op);
773   auto &DL = TLO.DAG.getDataLayout();
774 
775   // Don't know anything.
776   Known = KnownBits(BitWidth);
777 
778   // Undef operand.
779   if (Op.isUndef())
780     return false;
781 
782   if (Op.getOpcode() == ISD::Constant) {
783     // We know all of the bits for a constant!
784     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
785     Known.Zero = ~Known.One;
786     return false;
787   }
788 
789   // Other users may use these bits.
790   EVT VT = Op.getValueType();
791   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
792     if (Depth != 0) {
793       // If not at the root, Just compute the Known bits to
794       // simplify things downstream.
795       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
796       return false;
797     }
798     // If this is the root being simplified, allow it to have multiple uses,
799     // just set the DemandedBits/Elts to all bits.
800     DemandedBits = APInt::getAllOnesValue(BitWidth);
801     DemandedElts = APInt::getAllOnesValue(NumElts);
802   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
803     // Not demanding any bits/elts from Op.
804     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
805   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
806     // Limit search depth.
807     return false;
808   }
809 
810   KnownBits Known2, KnownOut;
811   switch (Op.getOpcode()) {
812   case ISD::TargetConstant:
813     llvm_unreachable("Can't simplify this node");
814   case ISD::SCALAR_TO_VECTOR: {
815     if (!DemandedElts[0])
816       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
817 
818     KnownBits SrcKnown;
819     SDValue Src = Op.getOperand(0);
820     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
821     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
822     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
823       return true;
824     Known = SrcKnown.zextOrTrunc(BitWidth, false);
825     break;
826   }
827   case ISD::BUILD_VECTOR:
828     // Collect the known bits that are shared by every demanded element.
829     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
830     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
831     return false; // Don't fall through, will infinitely loop.
832   case ISD::LOAD: {
833     LoadSDNode *LD = cast<LoadSDNode>(Op);
834     if (getTargetConstantFromLoad(LD)) {
835       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
836       return false; // Don't fall through, will infinitely loop.
837     }
838     break;
839   }
840   case ISD::INSERT_VECTOR_ELT: {
841     SDValue Vec = Op.getOperand(0);
842     SDValue Scl = Op.getOperand(1);
843     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
844     EVT VecVT = Vec.getValueType();
845 
846     // If index isn't constant, assume we need all vector elements AND the
847     // inserted element.
848     APInt DemandedVecElts(DemandedElts);
849     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
850       unsigned Idx = CIdx->getZExtValue();
851       DemandedVecElts.clearBit(Idx);
852 
853       // Inserted element is not required.
854       if (!DemandedElts[Idx])
855         return TLO.CombineTo(Op, Vec);
856     }
857 
858     KnownBits KnownScl;
859     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
860     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
861     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
862       return true;
863 
864     Known = KnownScl.zextOrTrunc(BitWidth, false);
865 
866     KnownBits KnownVec;
867     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
868                              Depth + 1))
869       return true;
870 
871     if (!!DemandedVecElts) {
872       Known.One &= KnownVec.One;
873       Known.Zero &= KnownVec.Zero;
874     }
875 
876     return false;
877   }
878   case ISD::INSERT_SUBVECTOR: {
879     SDValue Base = Op.getOperand(0);
880     SDValue Sub = Op.getOperand(1);
881     EVT SubVT = Sub.getValueType();
882     unsigned NumSubElts = SubVT.getVectorNumElements();
883 
884     // If index isn't constant, assume we need the original demanded base
885     // elements and ALL the inserted subvector elements.
886     APInt BaseElts = DemandedElts;
887     APInt SubElts = APInt::getAllOnesValue(NumSubElts);
888     if (isa<ConstantSDNode>(Op.getOperand(2))) {
889       const APInt &Idx = Op.getConstantOperandAPInt(2);
890       if (Idx.ule(NumElts - NumSubElts)) {
891         unsigned SubIdx = Idx.getZExtValue();
892         SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
893         BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
894       }
895     }
896 
897     KnownBits KnownSub, KnownBase;
898     if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
899                              Depth + 1))
900       return true;
901     if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
902                              Depth + 1))
903       return true;
904 
905     Known.Zero.setAllBits();
906     Known.One.setAllBits();
907     if (!!SubElts) {
908         Known.One &= KnownSub.One;
909         Known.Zero &= KnownSub.Zero;
910     }
911     if (!!BaseElts) {
912         Known.One &= KnownBase.One;
913         Known.Zero &= KnownBase.Zero;
914     }
915     break;
916   }
917   case ISD::EXTRACT_SUBVECTOR: {
918     // If index isn't constant, assume we need all the source vector elements.
919     SDValue Src = Op.getOperand(0);
920     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
921     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
922     APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
923     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
924       // Offset the demanded elts by the subvector index.
925       uint64_t Idx = SubIdx->getZExtValue();
926       SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
927     }
928     if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
929       return true;
930     break;
931   }
932   case ISD::CONCAT_VECTORS: {
933     Known.Zero.setAllBits();
934     Known.One.setAllBits();
935     EVT SubVT = Op.getOperand(0).getValueType();
936     unsigned NumSubVecs = Op.getNumOperands();
937     unsigned NumSubElts = SubVT.getVectorNumElements();
938     for (unsigned i = 0; i != NumSubVecs; ++i) {
939       APInt DemandedSubElts =
940           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
941       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
942                                Known2, TLO, Depth + 1))
943         return true;
944       // Known bits are shared by every demanded subvector element.
945       if (!!DemandedSubElts) {
946         Known.One &= Known2.One;
947         Known.Zero &= Known2.Zero;
948       }
949     }
950     break;
951   }
952   case ISD::VECTOR_SHUFFLE: {
953     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
954 
955     // Collect demanded elements from shuffle operands..
956     APInt DemandedLHS(NumElts, 0);
957     APInt DemandedRHS(NumElts, 0);
958     for (unsigned i = 0; i != NumElts; ++i) {
959       if (!DemandedElts[i])
960         continue;
961       int M = ShuffleMask[i];
962       if (M < 0) {
963         // For UNDEF elements, we don't know anything about the common state of
964         // the shuffle result.
965         DemandedLHS.clearAllBits();
966         DemandedRHS.clearAllBits();
967         break;
968       }
969       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
970       if (M < (int)NumElts)
971         DemandedLHS.setBit(M);
972       else
973         DemandedRHS.setBit(M - NumElts);
974     }
975 
976     if (!!DemandedLHS || !!DemandedRHS) {
977       SDValue Op0 = Op.getOperand(0);
978       SDValue Op1 = Op.getOperand(1);
979 
980       Known.Zero.setAllBits();
981       Known.One.setAllBits();
982       if (!!DemandedLHS) {
983         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
984                                  Depth + 1))
985           return true;
986         Known.One &= Known2.One;
987         Known.Zero &= Known2.Zero;
988       }
989       if (!!DemandedRHS) {
990         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
991                                  Depth + 1))
992           return true;
993         Known.One &= Known2.One;
994         Known.Zero &= Known2.Zero;
995       }
996 
997       // Attempt to avoid multi-use ops if we don't need anything from them.
998       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
999           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1000       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1001           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1002       if (DemandedOp0 || DemandedOp1) {
1003         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1004         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1005         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1006         return TLO.CombineTo(Op, NewOp);
1007       }
1008     }
1009     break;
1010   }
1011   case ISD::AND: {
1012     SDValue Op0 = Op.getOperand(0);
1013     SDValue Op1 = Op.getOperand(1);
1014 
1015     // If the RHS is a constant, check to see if the LHS would be zero without
1016     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1017     // simplify the LHS, here we're using information from the LHS to simplify
1018     // the RHS.
1019     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1020       // Do not increment Depth here; that can cause an infinite loop.
1021       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1022       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1023       if ((LHSKnown.Zero & DemandedBits) ==
1024           (~RHSC->getAPIntValue() & DemandedBits))
1025         return TLO.CombineTo(Op, Op0);
1026 
1027       // If any of the set bits in the RHS are known zero on the LHS, shrink
1028       // the constant.
1029       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1030         return true;
1031 
1032       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1033       // constant, but if this 'and' is only clearing bits that were just set by
1034       // the xor, then this 'and' can be eliminated by shrinking the mask of
1035       // the xor. For example, for a 32-bit X:
1036       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1037       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1038           LHSKnown.One == ~RHSC->getAPIntValue()) {
1039         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1040         return TLO.CombineTo(Op, Xor);
1041       }
1042     }
1043 
1044     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1045                              Depth + 1))
1046       return true;
1047     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1048     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1049                              Known2, TLO, Depth + 1))
1050       return true;
1051     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1052 
1053     // Attempt to avoid multi-use ops if we don't need anything from them.
1054     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1055       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1056           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1057       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1058           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1059       if (DemandedOp0 || DemandedOp1) {
1060         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1061         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1062         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1063         return TLO.CombineTo(Op, NewOp);
1064       }
1065     }
1066 
1067     // If all of the demanded bits are known one on one side, return the other.
1068     // These bits cannot contribute to the result of the 'and'.
1069     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1070       return TLO.CombineTo(Op, Op0);
1071     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1072       return TLO.CombineTo(Op, Op1);
1073     // If all of the demanded bits in the inputs are known zeros, return zero.
1074     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1075       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1076     // If the RHS is a constant, see if we can simplify it.
1077     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1078       return true;
1079     // If the operation can be done in a smaller type, do so.
1080     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1081       return true;
1082 
1083     // Output known-1 bits are only known if set in both the LHS & RHS.
1084     Known.One &= Known2.One;
1085     // Output known-0 are known to be clear if zero in either the LHS | RHS.
1086     Known.Zero |= Known2.Zero;
1087     break;
1088   }
1089   case ISD::OR: {
1090     SDValue Op0 = Op.getOperand(0);
1091     SDValue Op1 = Op.getOperand(1);
1092 
1093     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1094                              Depth + 1))
1095       return true;
1096     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1097     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1098                              Known2, TLO, Depth + 1))
1099       return true;
1100     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1101 
1102     // Attempt to avoid multi-use ops if we don't need anything from them.
1103     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1104       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1105           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1106       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1107           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1108       if (DemandedOp0 || DemandedOp1) {
1109         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1110         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1111         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1112         return TLO.CombineTo(Op, NewOp);
1113       }
1114     }
1115 
1116     // If all of the demanded bits are known zero on one side, return the other.
1117     // These bits cannot contribute to the result of the 'or'.
1118     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1119       return TLO.CombineTo(Op, Op0);
1120     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1121       return TLO.CombineTo(Op, Op1);
1122     // If the RHS is a constant, see if we can simplify it.
1123     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1124       return true;
1125     // If the operation can be done in a smaller type, do so.
1126     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1127       return true;
1128 
1129     // Output known-0 bits are only known if clear in both the LHS & RHS.
1130     Known.Zero &= Known2.Zero;
1131     // Output known-1 are known to be set if set in either the LHS | RHS.
1132     Known.One |= Known2.One;
1133     break;
1134   }
1135   case ISD::XOR: {
1136     SDValue Op0 = Op.getOperand(0);
1137     SDValue Op1 = Op.getOperand(1);
1138 
1139     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1140                              Depth + 1))
1141       return true;
1142     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1143     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1144                              Depth + 1))
1145       return true;
1146     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1147 
1148     // Attempt to avoid multi-use ops if we don't need anything from them.
1149     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1150       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1151           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1152       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1153           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1154       if (DemandedOp0 || DemandedOp1) {
1155         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1156         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1157         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1158         return TLO.CombineTo(Op, NewOp);
1159       }
1160     }
1161 
1162     // If all of the demanded bits are known zero on one side, return the other.
1163     // These bits cannot contribute to the result of the 'xor'.
1164     if (DemandedBits.isSubsetOf(Known.Zero))
1165       return TLO.CombineTo(Op, Op0);
1166     if (DemandedBits.isSubsetOf(Known2.Zero))
1167       return TLO.CombineTo(Op, Op1);
1168     // If the operation can be done in a smaller type, do so.
1169     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1170       return true;
1171 
1172     // If all of the unknown bits are known to be zero on one side or the other
1173     // (but not both) turn this into an *inclusive* or.
1174     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1175     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1176       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1177 
1178     // Output known-0 bits are known if clear or set in both the LHS & RHS.
1179     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1180     // Output known-1 are known to be set if set in only one of the LHS, RHS.
1181     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1182 
1183     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1184       // If one side is a constant, and all of the known set bits on the other
1185       // side are also set in the constant, turn this into an AND, as we know
1186       // the bits will be cleared.
1187       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1188       // NB: it is okay if more bits are known than are requested
1189       if (C->getAPIntValue() == Known2.One) {
1190         SDValue ANDC =
1191             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1192         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1193       }
1194 
1195       // If the RHS is a constant, see if we can change it. Don't alter a -1
1196       // constant because that's a 'not' op, and that is better for combining
1197       // and codegen.
1198       if (!C->isAllOnesValue()) {
1199         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1200           // We're flipping all demanded bits. Flip the undemanded bits too.
1201           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1202           return TLO.CombineTo(Op, New);
1203         }
1204         // If we can't turn this into a 'not', try to shrink the constant.
1205         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1206           return true;
1207       }
1208     }
1209 
1210     Known = std::move(KnownOut);
1211     break;
1212   }
1213   case ISD::SELECT:
1214     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1215                              Depth + 1))
1216       return true;
1217     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1218                              Depth + 1))
1219       return true;
1220     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1221     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1222 
1223     // If the operands are constants, see if we can simplify them.
1224     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1225       return true;
1226 
1227     // Only known if known in both the LHS and RHS.
1228     Known.One &= Known2.One;
1229     Known.Zero &= Known2.Zero;
1230     break;
1231   case ISD::SELECT_CC:
1232     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1233                              Depth + 1))
1234       return true;
1235     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1236                              Depth + 1))
1237       return true;
1238     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1239     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1240 
1241     // If the operands are constants, see if we can simplify them.
1242     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1243       return true;
1244 
1245     // Only known if known in both the LHS and RHS.
1246     Known.One &= Known2.One;
1247     Known.Zero &= Known2.Zero;
1248     break;
1249   case ISD::SETCC: {
1250     SDValue Op0 = Op.getOperand(0);
1251     SDValue Op1 = Op.getOperand(1);
1252     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1253     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1254     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1255     // -1, we may be able to bypass the setcc.
1256     if (DemandedBits.isSignMask() &&
1257         Op0.getScalarValueSizeInBits() == BitWidth &&
1258         getBooleanContents(VT) ==
1259             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1260       // If we're testing X < 0, then this compare isn't needed - just use X!
1261       // FIXME: We're limiting to integer types here, but this should also work
1262       // if we don't care about FP signed-zero. The use of SETLT with FP means
1263       // that we don't care about NaNs.
1264       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1265           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1266         return TLO.CombineTo(Op, Op0);
1267 
1268       // TODO: Should we check for other forms of sign-bit comparisons?
1269       // Examples: X <= -1, X >= 0
1270     }
1271     if (getBooleanContents(Op0.getValueType()) ==
1272             TargetLowering::ZeroOrOneBooleanContent &&
1273         BitWidth > 1)
1274       Known.Zero.setBitsFrom(1);
1275     break;
1276   }
1277   case ISD::SHL: {
1278     SDValue Op0 = Op.getOperand(0);
1279     SDValue Op1 = Op.getOperand(1);
1280 
1281     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1282       // If the shift count is an invalid immediate, don't do anything.
1283       if (SA->getAPIntValue().uge(BitWidth))
1284         break;
1285 
1286       unsigned ShAmt = SA->getZExtValue();
1287       if (ShAmt == 0)
1288         return TLO.CombineTo(Op, Op0);
1289 
1290       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1291       // single shift.  We can do this if the bottom bits (which are shifted
1292       // out) are never demanded.
1293       // TODO - support non-uniform vector amounts.
1294       if (Op0.getOpcode() == ISD::SRL) {
1295         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1296           if (ConstantSDNode *SA2 =
1297                   isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1298             if (SA2->getAPIntValue().ult(BitWidth)) {
1299               unsigned C1 = SA2->getZExtValue();
1300               unsigned Opc = ISD::SHL;
1301               int Diff = ShAmt - C1;
1302               if (Diff < 0) {
1303                 Diff = -Diff;
1304                 Opc = ISD::SRL;
1305               }
1306 
1307               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1308               return TLO.CombineTo(
1309                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1310             }
1311           }
1312         }
1313       }
1314 
1315       if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1316                                Known, TLO, Depth + 1))
1317         return true;
1318 
1319       // Try shrinking the operation as long as the shift amount will still be
1320       // in range.
1321       if ((ShAmt < DemandedBits.getActiveBits()) &&
1322           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1323         return true;
1324 
1325       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1326       // are not demanded. This will likely allow the anyext to be folded away.
1327       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1328         SDValue InnerOp = Op0.getOperand(0);
1329         EVT InnerVT = InnerOp.getValueType();
1330         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1331         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1332             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1333           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1334           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1335             ShTy = InnerVT;
1336           SDValue NarrowShl =
1337               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1338                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1339           return TLO.CombineTo(
1340               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1341         }
1342         // Repeat the SHL optimization above in cases where an extension
1343         // intervenes: (shl (anyext (shr x, c1)), c2) to
1344         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1345         // aren't demanded (as above) and that the shifted upper c1 bits of
1346         // x aren't demanded.
1347         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1348             InnerOp.hasOneUse()) {
1349           if (ConstantSDNode *SA2 =
1350                   isConstOrConstSplat(InnerOp.getOperand(1))) {
1351             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1352             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1353                 DemandedBits.getActiveBits() <=
1354                     (InnerBits - InnerShAmt + ShAmt) &&
1355                 DemandedBits.countTrailingZeros() >= ShAmt) {
1356               SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1357                                                   Op1.getValueType());
1358               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1359                                                InnerOp.getOperand(0));
1360               return TLO.CombineTo(
1361                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1362             }
1363           }
1364         }
1365       }
1366 
1367       Known.Zero <<= ShAmt;
1368       Known.One <<= ShAmt;
1369       // low bits known zero.
1370       Known.Zero.setLowBits(ShAmt);
1371     }
1372     break;
1373   }
1374   case ISD::SRL: {
1375     SDValue Op0 = Op.getOperand(0);
1376     SDValue Op1 = Op.getOperand(1);
1377 
1378     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1379       // If the shift count is an invalid immediate, don't do anything.
1380       if (SA->getAPIntValue().uge(BitWidth))
1381         break;
1382 
1383       unsigned ShAmt = SA->getZExtValue();
1384       if (ShAmt == 0)
1385         return TLO.CombineTo(Op, Op0);
1386 
1387       EVT ShiftVT = Op1.getValueType();
1388       APInt InDemandedMask = (DemandedBits << ShAmt);
1389 
1390       // If the shift is exact, then it does demand the low bits (and knows that
1391       // they are zero).
1392       if (Op->getFlags().hasExact())
1393         InDemandedMask.setLowBits(ShAmt);
1394 
1395       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1396       // single shift.  We can do this if the top bits (which are shifted out)
1397       // are never demanded.
1398       // TODO - support non-uniform vector amounts.
1399       if (Op0.getOpcode() == ISD::SHL) {
1400         if (ConstantSDNode *SA2 =
1401                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1402           if (!DemandedBits.intersects(
1403                   APInt::getHighBitsSet(BitWidth, ShAmt))) {
1404             if (SA2->getAPIntValue().ult(BitWidth)) {
1405               unsigned C1 = SA2->getZExtValue();
1406               unsigned Opc = ISD::SRL;
1407               int Diff = ShAmt - C1;
1408               if (Diff < 0) {
1409                 Diff = -Diff;
1410                 Opc = ISD::SHL;
1411               }
1412 
1413               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1414               return TLO.CombineTo(
1415                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1416             }
1417           }
1418         }
1419       }
1420 
1421       // Compute the new bits that are at the top now.
1422       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1423                                Depth + 1))
1424         return true;
1425       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1426       Known.Zero.lshrInPlace(ShAmt);
1427       Known.One.lshrInPlace(ShAmt);
1428 
1429       Known.Zero.setHighBits(ShAmt); // High bits known zero.
1430     }
1431     break;
1432   }
1433   case ISD::SRA: {
1434     SDValue Op0 = Op.getOperand(0);
1435     SDValue Op1 = Op.getOperand(1);
1436 
1437     // If this is an arithmetic shift right and only the low-bit is set, we can
1438     // always convert this into a logical shr, even if the shift amount is
1439     // variable.  The low bit of the shift cannot be an input sign bit unless
1440     // the shift amount is >= the size of the datatype, which is undefined.
1441     if (DemandedBits.isOneValue())
1442       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1443 
1444     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1445       // If the shift count is an invalid immediate, don't do anything.
1446       if (SA->getAPIntValue().uge(BitWidth))
1447         break;
1448 
1449       unsigned ShAmt = SA->getZExtValue();
1450       if (ShAmt == 0)
1451         return TLO.CombineTo(Op, Op0);
1452 
1453       APInt InDemandedMask = (DemandedBits << ShAmt);
1454 
1455       // If the shift is exact, then it does demand the low bits (and knows that
1456       // they are zero).
1457       if (Op->getFlags().hasExact())
1458         InDemandedMask.setLowBits(ShAmt);
1459 
1460       // If any of the demanded bits are produced by the sign extension, we also
1461       // demand the input sign bit.
1462       if (DemandedBits.countLeadingZeros() < ShAmt)
1463         InDemandedMask.setSignBit();
1464 
1465       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1466                                Depth + 1))
1467         return true;
1468       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1469       Known.Zero.lshrInPlace(ShAmt);
1470       Known.One.lshrInPlace(ShAmt);
1471 
1472       // If the input sign bit is known to be zero, or if none of the top bits
1473       // are demanded, turn this into an unsigned shift right.
1474       if (Known.Zero[BitWidth - ShAmt - 1] ||
1475           DemandedBits.countLeadingZeros() >= ShAmt) {
1476         SDNodeFlags Flags;
1477         Flags.setExact(Op->getFlags().hasExact());
1478         return TLO.CombineTo(
1479             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1480       }
1481 
1482       int Log2 = DemandedBits.exactLogBase2();
1483       if (Log2 >= 0) {
1484         // The bit must come from the sign.
1485         SDValue NewSA =
1486             TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1487         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1488       }
1489 
1490       if (Known.One[BitWidth - ShAmt - 1])
1491         // New bits are known one.
1492         Known.One.setHighBits(ShAmt);
1493     }
1494     break;
1495   }
1496   case ISD::FSHL:
1497   case ISD::FSHR: {
1498     SDValue Op0 = Op.getOperand(0);
1499     SDValue Op1 = Op.getOperand(1);
1500     SDValue Op2 = Op.getOperand(2);
1501     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1502 
1503     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1504       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1505 
1506       // For fshl, 0-shift returns the 1st arg.
1507       // For fshr, 0-shift returns the 2nd arg.
1508       if (Amt == 0) {
1509         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1510                                  Known, TLO, Depth + 1))
1511           return true;
1512         break;
1513       }
1514 
1515       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1516       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1517       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1518       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1519       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1520                                Depth + 1))
1521         return true;
1522       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1523                                Depth + 1))
1524         return true;
1525 
1526       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1527       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1528       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1529       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1530       Known.One |= Known2.One;
1531       Known.Zero |= Known2.Zero;
1532     }
1533     break;
1534   }
1535   case ISD::BITREVERSE: {
1536     SDValue Src = Op.getOperand(0);
1537     APInt DemandedSrcBits = DemandedBits.reverseBits();
1538     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1539                              Depth + 1))
1540       return true;
1541     Known.One = Known2.One.reverseBits();
1542     Known.Zero = Known2.Zero.reverseBits();
1543     break;
1544   }
1545   case ISD::SIGN_EXTEND_INREG: {
1546     SDValue Op0 = Op.getOperand(0);
1547     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1548     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1549 
1550     // If we only care about the highest bit, don't bother shifting right.
1551     if (DemandedBits.isSignMask()) {
1552       unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1553       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1554       // However if the input is already sign extended we expect the sign
1555       // extension to be dropped altogether later and do not simplify.
1556       if (!AlreadySignExtended) {
1557         // Compute the correct shift amount type, which must be getShiftAmountTy
1558         // for scalar types after legalization.
1559         EVT ShiftAmtTy = VT;
1560         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1561           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1562 
1563         SDValue ShiftAmt =
1564             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1565         return TLO.CombineTo(Op,
1566                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1567       }
1568     }
1569 
1570     // If none of the extended bits are demanded, eliminate the sextinreg.
1571     if (DemandedBits.getActiveBits() <= ExVTBits)
1572       return TLO.CombineTo(Op, Op0);
1573 
1574     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1575 
1576     // Since the sign extended bits are demanded, we know that the sign
1577     // bit is demanded.
1578     InputDemandedBits.setBit(ExVTBits - 1);
1579 
1580     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1581       return true;
1582     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1583 
1584     // If the sign bit of the input is known set or clear, then we know the
1585     // top bits of the result.
1586 
1587     // If the input sign bit is known zero, convert this into a zero extension.
1588     if (Known.Zero[ExVTBits - 1])
1589       return TLO.CombineTo(
1590           Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1591 
1592     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1593     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1594       Known.One.setBitsFrom(ExVTBits);
1595       Known.Zero &= Mask;
1596     } else { // Input sign bit unknown
1597       Known.Zero &= Mask;
1598       Known.One &= Mask;
1599     }
1600     break;
1601   }
1602   case ISD::BUILD_PAIR: {
1603     EVT HalfVT = Op.getOperand(0).getValueType();
1604     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1605 
1606     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1607     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1608 
1609     KnownBits KnownLo, KnownHi;
1610 
1611     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1612       return true;
1613 
1614     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1615       return true;
1616 
1617     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1618                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1619 
1620     Known.One = KnownLo.One.zext(BitWidth) |
1621                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1622     break;
1623   }
1624   case ISD::ZERO_EXTEND:
1625   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1626     SDValue Src = Op.getOperand(0);
1627     EVT SrcVT = Src.getValueType();
1628     unsigned InBits = SrcVT.getScalarSizeInBits();
1629     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1630     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1631 
1632     // If none of the top bits are demanded, convert this into an any_extend.
1633     if (DemandedBits.getActiveBits() <= InBits) {
1634       // If we only need the non-extended bits of the bottom element
1635       // then we can just bitcast to the result.
1636       if (IsVecInReg && DemandedElts == 1 &&
1637           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1638           TLO.DAG.getDataLayout().isLittleEndian())
1639         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1640 
1641       unsigned Opc =
1642           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1643       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1644         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1645     }
1646 
1647     APInt InDemandedBits = DemandedBits.trunc(InBits);
1648     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1649     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1650                              Depth + 1))
1651       return true;
1652     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1653     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1654     Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1655     break;
1656   }
1657   case ISD::SIGN_EXTEND:
1658   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1659     SDValue Src = Op.getOperand(0);
1660     EVT SrcVT = Src.getValueType();
1661     unsigned InBits = SrcVT.getScalarSizeInBits();
1662     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1663     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1664 
1665     // If none of the top bits are demanded, convert this into an any_extend.
1666     if (DemandedBits.getActiveBits() <= InBits) {
1667       // If we only need the non-extended bits of the bottom element
1668       // then we can just bitcast to the result.
1669       if (IsVecInReg && DemandedElts == 1 &&
1670           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1671           TLO.DAG.getDataLayout().isLittleEndian())
1672         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1673 
1674       unsigned Opc =
1675           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1676       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1677         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1678     }
1679 
1680     APInt InDemandedBits = DemandedBits.trunc(InBits);
1681     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1682 
1683     // Since some of the sign extended bits are demanded, we know that the sign
1684     // bit is demanded.
1685     InDemandedBits.setBit(InBits - 1);
1686 
1687     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1688                              Depth + 1))
1689       return true;
1690     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1691     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1692 
1693     // If the sign bit is known one, the top bits match.
1694     Known = Known.sext(BitWidth);
1695 
1696     // If the sign bit is known zero, convert this to a zero extend.
1697     if (Known.isNonNegative()) {
1698       unsigned Opc =
1699           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1700       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1701         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1702     }
1703     break;
1704   }
1705   case ISD::ANY_EXTEND:
1706   case ISD::ANY_EXTEND_VECTOR_INREG: {
1707     SDValue Src = Op.getOperand(0);
1708     EVT SrcVT = Src.getValueType();
1709     unsigned InBits = SrcVT.getScalarSizeInBits();
1710     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1711     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1712 
1713     // If we only need the bottom element then we can just bitcast.
1714     // TODO: Handle ANY_EXTEND?
1715     if (IsVecInReg && DemandedElts == 1 &&
1716         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1717         TLO.DAG.getDataLayout().isLittleEndian())
1718       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1719 
1720     APInt InDemandedBits = DemandedBits.trunc(InBits);
1721     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1722     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1723                              Depth + 1))
1724       return true;
1725     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1726     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1727     Known = Known.zext(BitWidth, false /* => any extend */);
1728     break;
1729   }
1730   case ISD::TRUNCATE: {
1731     SDValue Src = Op.getOperand(0);
1732 
1733     // Simplify the input, using demanded bit information, and compute the known
1734     // zero/one bits live out.
1735     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1736     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1737     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1738       return true;
1739     Known = Known.trunc(BitWidth);
1740 
1741     // Attempt to avoid multi-use ops if we don't need anything from them.
1742     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1743             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1744       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1745 
1746     // If the input is only used by this truncate, see if we can shrink it based
1747     // on the known demanded bits.
1748     if (Src.getNode()->hasOneUse()) {
1749       switch (Src.getOpcode()) {
1750       default:
1751         break;
1752       case ISD::SRL:
1753         // Shrink SRL by a constant if none of the high bits shifted in are
1754         // demanded.
1755         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1756           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1757           // undesirable.
1758           break;
1759 
1760         auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1761         if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
1762           break;
1763 
1764         SDValue Shift = Src.getOperand(1);
1765         uint64_t ShVal = ShAmt->getZExtValue();
1766 
1767         if (TLO.LegalTypes())
1768           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1769 
1770         APInt HighBits =
1771             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1772         HighBits.lshrInPlace(ShVal);
1773         HighBits = HighBits.trunc(BitWidth);
1774 
1775         if (!(HighBits & DemandedBits)) {
1776           // None of the shifted in bits are needed.  Add a truncate of the
1777           // shift input, then shift it.
1778           SDValue NewTrunc =
1779               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1780           return TLO.CombineTo(
1781               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
1782         }
1783         break;
1784       }
1785     }
1786 
1787     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1788     break;
1789   }
1790   case ISD::AssertZext: {
1791     // AssertZext demands all of the high bits, plus any of the low bits
1792     // demanded by its users.
1793     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1794     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1795     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1796                              TLO, Depth + 1))
1797       return true;
1798     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1799 
1800     Known.Zero |= ~InMask;
1801     break;
1802   }
1803   case ISD::EXTRACT_VECTOR_ELT: {
1804     SDValue Src = Op.getOperand(0);
1805     SDValue Idx = Op.getOperand(1);
1806     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1807     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1808 
1809     // Demand the bits from every vector element without a constant index.
1810     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1811     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1812       if (CIdx->getAPIntValue().ult(NumSrcElts))
1813         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1814 
1815     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1816     // anything about the extended bits.
1817     APInt DemandedSrcBits = DemandedBits;
1818     if (BitWidth > EltBitWidth)
1819       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1820 
1821     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1822                              Depth + 1))
1823       return true;
1824 
1825     Known = Known2;
1826     if (BitWidth > EltBitWidth)
1827       Known = Known.zext(BitWidth, false /* => any extend */);
1828     break;
1829   }
1830   case ISD::BITCAST: {
1831     SDValue Src = Op.getOperand(0);
1832     EVT SrcVT = Src.getValueType();
1833     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1834 
1835     // If this is an FP->Int bitcast and if the sign bit is the only
1836     // thing demanded, turn this into a FGETSIGN.
1837     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1838         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1839         SrcVT.isFloatingPoint()) {
1840       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1841       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1842       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1843           SrcVT != MVT::f128) {
1844         // Cannot eliminate/lower SHL for f128 yet.
1845         EVT Ty = OpVTLegal ? VT : MVT::i32;
1846         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1847         // place.  We expect the SHL to be eliminated by other optimizations.
1848         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1849         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1850         if (!OpVTLegal && OpVTSizeInBits > 32)
1851           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1852         unsigned ShVal = Op.getValueSizeInBits() - 1;
1853         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1854         return TLO.CombineTo(Op,
1855                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1856       }
1857     }
1858 
1859     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1860     // Demand the elt/bit if any of the original elts/bits are demanded.
1861     // TODO - bigendian once we have test coverage.
1862     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
1863         TLO.DAG.getDataLayout().isLittleEndian()) {
1864       unsigned Scale = BitWidth / NumSrcEltBits;
1865       unsigned NumSrcElts = SrcVT.getVectorNumElements();
1866       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1867       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1868       for (unsigned i = 0; i != Scale; ++i) {
1869         unsigned Offset = i * NumSrcEltBits;
1870         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1871         if (!Sub.isNullValue()) {
1872           DemandedSrcBits |= Sub;
1873           for (unsigned j = 0; j != NumElts; ++j)
1874             if (DemandedElts[j])
1875               DemandedSrcElts.setBit((j * Scale) + i);
1876         }
1877       }
1878 
1879       APInt KnownSrcUndef, KnownSrcZero;
1880       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1881                                      KnownSrcZero, TLO, Depth + 1))
1882         return true;
1883 
1884       KnownBits KnownSrcBits;
1885       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1886                                KnownSrcBits, TLO, Depth + 1))
1887         return true;
1888     } else if ((NumSrcEltBits % BitWidth) == 0 &&
1889                TLO.DAG.getDataLayout().isLittleEndian()) {
1890       unsigned Scale = NumSrcEltBits / BitWidth;
1891       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1892       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1893       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1894       for (unsigned i = 0; i != NumElts; ++i)
1895         if (DemandedElts[i]) {
1896           unsigned Offset = (i % Scale) * BitWidth;
1897           DemandedSrcBits.insertBits(DemandedBits, Offset);
1898           DemandedSrcElts.setBit(i / Scale);
1899         }
1900 
1901       if (SrcVT.isVector()) {
1902         APInt KnownSrcUndef, KnownSrcZero;
1903         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1904                                        KnownSrcZero, TLO, Depth + 1))
1905           return true;
1906       }
1907 
1908       KnownBits KnownSrcBits;
1909       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1910                                KnownSrcBits, TLO, Depth + 1))
1911         return true;
1912     }
1913 
1914     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1915     // recursive call where Known may be useful to the caller.
1916     if (Depth > 0) {
1917       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1918       return false;
1919     }
1920     break;
1921   }
1922   case ISD::ADD:
1923   case ISD::MUL:
1924   case ISD::SUB: {
1925     // Add, Sub, and Mul don't demand any bits in positions beyond that
1926     // of the highest bit demanded of them.
1927     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1928     SDNodeFlags Flags = Op.getNode()->getFlags();
1929     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1930     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1931     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1932                              Depth + 1) ||
1933         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1934                              Depth + 1) ||
1935         // See if the operation should be performed at a smaller bit width.
1936         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1937       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1938         // Disable the nsw and nuw flags. We can no longer guarantee that we
1939         // won't wrap after simplification.
1940         Flags.setNoSignedWrap(false);
1941         Flags.setNoUnsignedWrap(false);
1942         SDValue NewOp =
1943             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1944         return TLO.CombineTo(Op, NewOp);
1945       }
1946       return true;
1947     }
1948 
1949     // Attempt to avoid multi-use ops if we don't need anything from them.
1950     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1951       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1952           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1953       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1954           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1955       if (DemandedOp0 || DemandedOp1) {
1956         Flags.setNoSignedWrap(false);
1957         Flags.setNoUnsignedWrap(false);
1958         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1959         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1960         SDValue NewOp =
1961             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1962         return TLO.CombineTo(Op, NewOp);
1963       }
1964     }
1965 
1966     // If we have a constant operand, we may be able to turn it into -1 if we
1967     // do not demand the high bits. This can make the constant smaller to
1968     // encode, allow more general folding, or match specialized instruction
1969     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1970     // is probably not useful (and could be detrimental).
1971     ConstantSDNode *C = isConstOrConstSplat(Op1);
1972     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1973     if (C && !C->isAllOnesValue() && !C->isOne() &&
1974         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1975       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1976       // Disable the nsw and nuw flags. We can no longer guarantee that we
1977       // won't wrap after simplification.
1978       Flags.setNoSignedWrap(false);
1979       Flags.setNoUnsignedWrap(false);
1980       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1981       return TLO.CombineTo(Op, NewOp);
1982     }
1983 
1984     LLVM_FALLTHROUGH;
1985   }
1986   default:
1987     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
1988       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
1989                                             Known, TLO, Depth))
1990         return true;
1991       break;
1992     }
1993 
1994     // Just use computeKnownBits to compute output bits.
1995     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1996     break;
1997   }
1998 
1999   // If we know the value of all of the demanded bits, return this as a
2000   // constant.
2001   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2002     // Avoid folding to a constant if any OpaqueConstant is involved.
2003     const SDNode *N = Op.getNode();
2004     for (SDNodeIterator I = SDNodeIterator::begin(N),
2005                         E = SDNodeIterator::end(N);
2006          I != E; ++I) {
2007       SDNode *Op = *I;
2008       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2009         if (C->isOpaque())
2010           return false;
2011     }
2012     // TODO: Handle float bits as well.
2013     if (VT.isInteger())
2014       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2015   }
2016 
2017   return false;
2018 }
2019 
2020 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2021                                                 const APInt &DemandedElts,
2022                                                 APInt &KnownUndef,
2023                                                 APInt &KnownZero,
2024                                                 DAGCombinerInfo &DCI) const {
2025   SelectionDAG &DAG = DCI.DAG;
2026   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2027                         !DCI.isBeforeLegalizeOps());
2028 
2029   bool Simplified =
2030       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2031   if (Simplified) {
2032     DCI.AddToWorklist(Op.getNode());
2033     DCI.CommitTargetLoweringOpt(TLO);
2034   }
2035 
2036   return Simplified;
2037 }
2038 
2039 /// Given a vector binary operation and known undefined elements for each input
2040 /// operand, compute whether each element of the output is undefined.
2041 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2042                                          const APInt &UndefOp0,
2043                                          const APInt &UndefOp1) {
2044   EVT VT = BO.getValueType();
2045   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2046          "Vector binop only");
2047 
2048   EVT EltVT = VT.getVectorElementType();
2049   unsigned NumElts = VT.getVectorNumElements();
2050   assert(UndefOp0.getBitWidth() == NumElts &&
2051          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2052 
2053   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2054                                    const APInt &UndefVals) {
2055     if (UndefVals[Index])
2056       return DAG.getUNDEF(EltVT);
2057 
2058     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2059       // Try hard to make sure that the getNode() call is not creating temporary
2060       // nodes. Ignore opaque integers because they do not constant fold.
2061       SDValue Elt = BV->getOperand(Index);
2062       auto *C = dyn_cast<ConstantSDNode>(Elt);
2063       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2064         return Elt;
2065     }
2066 
2067     return SDValue();
2068   };
2069 
2070   APInt KnownUndef = APInt::getNullValue(NumElts);
2071   for (unsigned i = 0; i != NumElts; ++i) {
2072     // If both inputs for this element are either constant or undef and match
2073     // the element type, compute the constant/undef result for this element of
2074     // the vector.
2075     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2076     // not handle FP constants. The code within getNode() should be refactored
2077     // to avoid the danger of creating a bogus temporary node here.
2078     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2079     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2080     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2081       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2082         KnownUndef.setBit(i);
2083   }
2084   return KnownUndef;
2085 }
2086 
2087 bool TargetLowering::SimplifyDemandedVectorElts(
2088     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2089     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2090     bool AssumeSingleUse) const {
2091   EVT VT = Op.getValueType();
2092   APInt DemandedElts = OriginalDemandedElts;
2093   unsigned NumElts = DemandedElts.getBitWidth();
2094   assert(VT.isVector() && "Expected vector op");
2095   assert(VT.getVectorNumElements() == NumElts &&
2096          "Mask size mismatches value type element count!");
2097 
2098   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2099 
2100   // Undef operand.
2101   if (Op.isUndef()) {
2102     KnownUndef.setAllBits();
2103     return false;
2104   }
2105 
2106   // If Op has other users, assume that all elements are needed.
2107   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2108     DemandedElts.setAllBits();
2109 
2110   // Not demanding any elements from Op.
2111   if (DemandedElts == 0) {
2112     KnownUndef.setAllBits();
2113     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2114   }
2115 
2116   // Limit search depth.
2117   if (Depth >= SelectionDAG::MaxRecursionDepth)
2118     return false;
2119 
2120   SDLoc DL(Op);
2121   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2122 
2123   switch (Op.getOpcode()) {
2124   case ISD::SCALAR_TO_VECTOR: {
2125     if (!DemandedElts[0]) {
2126       KnownUndef.setAllBits();
2127       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2128     }
2129     KnownUndef.setHighBits(NumElts - 1);
2130     break;
2131   }
2132   case ISD::BITCAST: {
2133     SDValue Src = Op.getOperand(0);
2134     EVT SrcVT = Src.getValueType();
2135 
2136     // We only handle vectors here.
2137     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2138     if (!SrcVT.isVector())
2139       break;
2140 
2141     // Fast handling of 'identity' bitcasts.
2142     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2143     if (NumSrcElts == NumElts)
2144       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2145                                         KnownZero, TLO, Depth + 1);
2146 
2147     APInt SrcZero, SrcUndef;
2148     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2149 
2150     // Bitcast from 'large element' src vector to 'small element' vector, we
2151     // must demand a source element if any DemandedElt maps to it.
2152     if ((NumElts % NumSrcElts) == 0) {
2153       unsigned Scale = NumElts / NumSrcElts;
2154       for (unsigned i = 0; i != NumElts; ++i)
2155         if (DemandedElts[i])
2156           SrcDemandedElts.setBit(i / Scale);
2157 
2158       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2159                                      TLO, Depth + 1))
2160         return true;
2161 
2162       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2163       // of the large element.
2164       // TODO - bigendian once we have test coverage.
2165       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2166         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2167         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2168         for (unsigned i = 0; i != NumElts; ++i)
2169           if (DemandedElts[i]) {
2170             unsigned Ofs = (i % Scale) * EltSizeInBits;
2171             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2172           }
2173 
2174         KnownBits Known;
2175         if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2176           return true;
2177       }
2178 
2179       // If the src element is zero/undef then all the output elements will be -
2180       // only demanded elements are guaranteed to be correct.
2181       for (unsigned i = 0; i != NumSrcElts; ++i) {
2182         if (SrcDemandedElts[i]) {
2183           if (SrcZero[i])
2184             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2185           if (SrcUndef[i])
2186             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2187         }
2188       }
2189     }
2190 
2191     // Bitcast from 'small element' src vector to 'large element' vector, we
2192     // demand all smaller source elements covered by the larger demanded element
2193     // of this vector.
2194     if ((NumSrcElts % NumElts) == 0) {
2195       unsigned Scale = NumSrcElts / NumElts;
2196       for (unsigned i = 0; i != NumElts; ++i)
2197         if (DemandedElts[i])
2198           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2199 
2200       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2201                                      TLO, Depth + 1))
2202         return true;
2203 
2204       // If all the src elements covering an output element are zero/undef, then
2205       // the output element will be as well, assuming it was demanded.
2206       for (unsigned i = 0; i != NumElts; ++i) {
2207         if (DemandedElts[i]) {
2208           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2209             KnownZero.setBit(i);
2210           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2211             KnownUndef.setBit(i);
2212         }
2213       }
2214     }
2215     break;
2216   }
2217   case ISD::BUILD_VECTOR: {
2218     // Check all elements and simplify any unused elements with UNDEF.
2219     if (!DemandedElts.isAllOnesValue()) {
2220       // Don't simplify BROADCASTS.
2221       if (llvm::any_of(Op->op_values(),
2222                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2223         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2224         bool Updated = false;
2225         for (unsigned i = 0; i != NumElts; ++i) {
2226           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2227             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2228             KnownUndef.setBit(i);
2229             Updated = true;
2230           }
2231         }
2232         if (Updated)
2233           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2234       }
2235     }
2236     for (unsigned i = 0; i != NumElts; ++i) {
2237       SDValue SrcOp = Op.getOperand(i);
2238       if (SrcOp.isUndef()) {
2239         KnownUndef.setBit(i);
2240       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2241                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2242         KnownZero.setBit(i);
2243       }
2244     }
2245     break;
2246   }
2247   case ISD::CONCAT_VECTORS: {
2248     EVT SubVT = Op.getOperand(0).getValueType();
2249     unsigned NumSubVecs = Op.getNumOperands();
2250     unsigned NumSubElts = SubVT.getVectorNumElements();
2251     for (unsigned i = 0; i != NumSubVecs; ++i) {
2252       SDValue SubOp = Op.getOperand(i);
2253       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2254       APInt SubUndef, SubZero;
2255       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2256                                      Depth + 1))
2257         return true;
2258       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2259       KnownZero.insertBits(SubZero, i * NumSubElts);
2260     }
2261     break;
2262   }
2263   case ISD::INSERT_SUBVECTOR: {
2264     if (!isa<ConstantSDNode>(Op.getOperand(2)))
2265       break;
2266     SDValue Base = Op.getOperand(0);
2267     SDValue Sub = Op.getOperand(1);
2268     EVT SubVT = Sub.getValueType();
2269     unsigned NumSubElts = SubVT.getVectorNumElements();
2270     const APInt &Idx = Op.getConstantOperandAPInt(2);
2271     if (Idx.ugt(NumElts - NumSubElts))
2272       break;
2273     unsigned SubIdx = Idx.getZExtValue();
2274     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2275     APInt SubUndef, SubZero;
2276     if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2277                                    Depth + 1))
2278       return true;
2279     APInt BaseElts = DemandedElts;
2280     BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2281 
2282     // If none of the base operand elements are demanded, replace it with undef.
2283     if (!BaseElts && !Base.isUndef())
2284       return TLO.CombineTo(Op,
2285                            TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2286                                            TLO.DAG.getUNDEF(VT),
2287                                            Op.getOperand(1),
2288                                            Op.getOperand(2)));
2289 
2290     if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2291                                    Depth + 1))
2292       return true;
2293     KnownUndef.insertBits(SubUndef, SubIdx);
2294     KnownZero.insertBits(SubZero, SubIdx);
2295     break;
2296   }
2297   case ISD::EXTRACT_SUBVECTOR: {
2298     SDValue Src = Op.getOperand(0);
2299     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2300     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2301     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2302       // Offset the demanded elts by the subvector index.
2303       uint64_t Idx = SubIdx->getZExtValue();
2304       APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2305       APInt SrcUndef, SrcZero;
2306       if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2307                                      Depth + 1))
2308         return true;
2309       KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2310       KnownZero = SrcZero.extractBits(NumElts, Idx);
2311     }
2312     break;
2313   }
2314   case ISD::INSERT_VECTOR_ELT: {
2315     SDValue Vec = Op.getOperand(0);
2316     SDValue Scl = Op.getOperand(1);
2317     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2318 
2319     // For a legal, constant insertion index, if we don't need this insertion
2320     // then strip it, else remove it from the demanded elts.
2321     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2322       unsigned Idx = CIdx->getZExtValue();
2323       if (!DemandedElts[Idx])
2324         return TLO.CombineTo(Op, Vec);
2325 
2326       APInt DemandedVecElts(DemandedElts);
2327       DemandedVecElts.clearBit(Idx);
2328       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2329                                      KnownZero, TLO, Depth + 1))
2330         return true;
2331 
2332       KnownUndef.clearBit(Idx);
2333       if (Scl.isUndef())
2334         KnownUndef.setBit(Idx);
2335 
2336       KnownZero.clearBit(Idx);
2337       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2338         KnownZero.setBit(Idx);
2339       break;
2340     }
2341 
2342     APInt VecUndef, VecZero;
2343     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2344                                    Depth + 1))
2345       return true;
2346     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2347     break;
2348   }
2349   case ISD::VSELECT: {
2350     // Try to transform the select condition based on the current demanded
2351     // elements.
2352     // TODO: If a condition element is undef, we can choose from one arm of the
2353     //       select (and if one arm is undef, then we can propagate that to the
2354     //       result).
2355     // TODO - add support for constant vselect masks (see IR version of this).
2356     APInt UnusedUndef, UnusedZero;
2357     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2358                                    UnusedZero, TLO, Depth + 1))
2359       return true;
2360 
2361     // See if we can simplify either vselect operand.
2362     APInt DemandedLHS(DemandedElts);
2363     APInt DemandedRHS(DemandedElts);
2364     APInt UndefLHS, ZeroLHS;
2365     APInt UndefRHS, ZeroRHS;
2366     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2367                                    ZeroLHS, TLO, Depth + 1))
2368       return true;
2369     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2370                                    ZeroRHS, TLO, Depth + 1))
2371       return true;
2372 
2373     KnownUndef = UndefLHS & UndefRHS;
2374     KnownZero = ZeroLHS & ZeroRHS;
2375     break;
2376   }
2377   case ISD::VECTOR_SHUFFLE: {
2378     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2379 
2380     // Collect demanded elements from shuffle operands..
2381     APInt DemandedLHS(NumElts, 0);
2382     APInt DemandedRHS(NumElts, 0);
2383     for (unsigned i = 0; i != NumElts; ++i) {
2384       int M = ShuffleMask[i];
2385       if (M < 0 || !DemandedElts[i])
2386         continue;
2387       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2388       if (M < (int)NumElts)
2389         DemandedLHS.setBit(M);
2390       else
2391         DemandedRHS.setBit(M - NumElts);
2392     }
2393 
2394     // See if we can simplify either shuffle operand.
2395     APInt UndefLHS, ZeroLHS;
2396     APInt UndefRHS, ZeroRHS;
2397     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2398                                    ZeroLHS, TLO, Depth + 1))
2399       return true;
2400     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2401                                    ZeroRHS, TLO, Depth + 1))
2402       return true;
2403 
2404     // Simplify mask using undef elements from LHS/RHS.
2405     bool Updated = false;
2406     bool IdentityLHS = true, IdentityRHS = true;
2407     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2408     for (unsigned i = 0; i != NumElts; ++i) {
2409       int &M = NewMask[i];
2410       if (M < 0)
2411         continue;
2412       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2413           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2414         Updated = true;
2415         M = -1;
2416       }
2417       IdentityLHS &= (M < 0) || (M == (int)i);
2418       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2419     }
2420 
2421     // Update legal shuffle masks based on demanded elements if it won't reduce
2422     // to Identity which can cause premature removal of the shuffle mask.
2423     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2424       SDValue LegalShuffle =
2425           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2426                                   NewMask, TLO.DAG);
2427       if (LegalShuffle)
2428         return TLO.CombineTo(Op, LegalShuffle);
2429     }
2430 
2431     // Propagate undef/zero elements from LHS/RHS.
2432     for (unsigned i = 0; i != NumElts; ++i) {
2433       int M = ShuffleMask[i];
2434       if (M < 0) {
2435         KnownUndef.setBit(i);
2436       } else if (M < (int)NumElts) {
2437         if (UndefLHS[M])
2438           KnownUndef.setBit(i);
2439         if (ZeroLHS[M])
2440           KnownZero.setBit(i);
2441       } else {
2442         if (UndefRHS[M - NumElts])
2443           KnownUndef.setBit(i);
2444         if (ZeroRHS[M - NumElts])
2445           KnownZero.setBit(i);
2446       }
2447     }
2448     break;
2449   }
2450   case ISD::ANY_EXTEND_VECTOR_INREG:
2451   case ISD::SIGN_EXTEND_VECTOR_INREG:
2452   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2453     APInt SrcUndef, SrcZero;
2454     SDValue Src = Op.getOperand(0);
2455     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2456     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2457     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2458                                    Depth + 1))
2459       return true;
2460     KnownZero = SrcZero.zextOrTrunc(NumElts);
2461     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2462 
2463     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2464         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2465         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2466       // aext - if we just need the bottom element then we can bitcast.
2467       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2468     }
2469 
2470     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2471       // zext(undef) upper bits are guaranteed to be zero.
2472       if (DemandedElts.isSubsetOf(KnownUndef))
2473         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2474       KnownUndef.clearAllBits();
2475     }
2476     break;
2477   }
2478 
2479   // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2480   // MAX, saturated math, etc.
2481   case ISD::OR:
2482   case ISD::XOR:
2483   case ISD::ADD:
2484   case ISD::SUB:
2485   case ISD::FADD:
2486   case ISD::FSUB:
2487   case ISD::FMUL:
2488   case ISD::FDIV:
2489   case ISD::FREM: {
2490     APInt UndefRHS, ZeroRHS;
2491     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2492                                    ZeroRHS, TLO, Depth + 1))
2493       return true;
2494     APInt UndefLHS, ZeroLHS;
2495     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2496                                    ZeroLHS, TLO, Depth + 1))
2497       return true;
2498 
2499     KnownZero = ZeroLHS & ZeroRHS;
2500     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2501     break;
2502   }
2503   case ISD::SHL:
2504   case ISD::SRL:
2505   case ISD::SRA:
2506   case ISD::ROTL:
2507   case ISD::ROTR: {
2508     APInt UndefRHS, ZeroRHS;
2509     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2510                                    ZeroRHS, TLO, Depth + 1))
2511       return true;
2512     APInt UndefLHS, ZeroLHS;
2513     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2514                                    ZeroLHS, TLO, Depth + 1))
2515       return true;
2516 
2517     KnownZero = ZeroLHS;
2518     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2519     break;
2520   }
2521   case ISD::MUL:
2522   case ISD::AND: {
2523     APInt SrcUndef, SrcZero;
2524     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2525                                    SrcZero, TLO, Depth + 1))
2526       return true;
2527     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2528                                    KnownZero, TLO, Depth + 1))
2529       return true;
2530 
2531     // If either side has a zero element, then the result element is zero, even
2532     // if the other is an UNDEF.
2533     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2534     // and then handle 'and' nodes with the rest of the binop opcodes.
2535     KnownZero |= SrcZero;
2536     KnownUndef &= SrcUndef;
2537     KnownUndef &= ~KnownZero;
2538     break;
2539   }
2540   case ISD::TRUNCATE:
2541   case ISD::SIGN_EXTEND:
2542   case ISD::ZERO_EXTEND:
2543     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2544                                    KnownZero, TLO, Depth + 1))
2545       return true;
2546 
2547     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2548       // zext(undef) upper bits are guaranteed to be zero.
2549       if (DemandedElts.isSubsetOf(KnownUndef))
2550         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2551       KnownUndef.clearAllBits();
2552     }
2553     break;
2554   default: {
2555     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2556       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2557                                                   KnownZero, TLO, Depth))
2558         return true;
2559     } else {
2560       KnownBits Known;
2561       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2562       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2563                                TLO, Depth, AssumeSingleUse))
2564         return true;
2565     }
2566     break;
2567   }
2568   }
2569   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2570 
2571   // Constant fold all undef cases.
2572   // TODO: Handle zero cases as well.
2573   if (DemandedElts.isSubsetOf(KnownUndef))
2574     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2575 
2576   return false;
2577 }
2578 
2579 /// Determine which of the bits specified in Mask are known to be either zero or
2580 /// one and return them in the Known.
2581 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2582                                                    KnownBits &Known,
2583                                                    const APInt &DemandedElts,
2584                                                    const SelectionDAG &DAG,
2585                                                    unsigned Depth) const {
2586   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2587           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2588           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2589           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2590          "Should use MaskedValueIsZero if you don't know whether Op"
2591          " is a target node!");
2592   Known.resetAll();
2593 }
2594 
2595 void TargetLowering::computeKnownBitsForTargetInstr(
2596     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2597     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2598     unsigned Depth) const {
2599   Known.resetAll();
2600 }
2601 
2602 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2603                                                    KnownBits &Known,
2604                                                    const APInt &DemandedElts,
2605                                                    const SelectionDAG &DAG,
2606                                                    unsigned Depth) const {
2607   assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2608 
2609   if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2610     // The low bits are known zero if the pointer is aligned.
2611     Known.Zero.setLowBits(Log2_32(Align));
2612   }
2613 }
2614 
2615 /// This method can be implemented by targets that want to expose additional
2616 /// information about sign bits to the DAG Combiner.
2617 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2618                                                          const APInt &,
2619                                                          const SelectionDAG &,
2620                                                          unsigned Depth) const {
2621   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2622           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2623           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2624           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2625          "Should use ComputeNumSignBits if you don't know whether Op"
2626          " is a target node!");
2627   return 1;
2628 }
2629 
2630 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2631     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2632     TargetLoweringOpt &TLO, unsigned Depth) const {
2633   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2634           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2635           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2636           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2637          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2638          " is a target node!");
2639   return false;
2640 }
2641 
2642 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2643     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2644     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2645   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2646           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2647           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2648           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2649          "Should use SimplifyDemandedBits if you don't know whether Op"
2650          " is a target node!");
2651   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2652   return false;
2653 }
2654 
2655 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2656     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2657     SelectionDAG &DAG, unsigned Depth) const {
2658   assert(
2659       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2660        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2661        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2662        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2663       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2664       " is a target node!");
2665   return SDValue();
2666 }
2667 
2668 SDValue
2669 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2670                                         SDValue N1, MutableArrayRef<int> Mask,
2671                                         SelectionDAG &DAG) const {
2672   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2673   if (!LegalMask) {
2674     std::swap(N0, N1);
2675     ShuffleVectorSDNode::commuteMask(Mask);
2676     LegalMask = isShuffleMaskLegal(Mask, VT);
2677   }
2678 
2679   if (!LegalMask)
2680     return SDValue();
2681 
2682   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2683 }
2684 
2685 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2686   return nullptr;
2687 }
2688 
2689 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2690                                                   const SelectionDAG &DAG,
2691                                                   bool SNaN,
2692                                                   unsigned Depth) const {
2693   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2694           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2695           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2696           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2697          "Should use isKnownNeverNaN if you don't know whether Op"
2698          " is a target node!");
2699   return false;
2700 }
2701 
2702 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2703 // work with truncating build vectors and vectors with elements of less than
2704 // 8 bits.
2705 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2706   if (!N)
2707     return false;
2708 
2709   APInt CVal;
2710   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2711     CVal = CN->getAPIntValue();
2712   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2713     auto *CN = BV->getConstantSplatNode();
2714     if (!CN)
2715       return false;
2716 
2717     // If this is a truncating build vector, truncate the splat value.
2718     // Otherwise, we may fail to match the expected values below.
2719     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2720     CVal = CN->getAPIntValue();
2721     if (BVEltWidth < CVal.getBitWidth())
2722       CVal = CVal.trunc(BVEltWidth);
2723   } else {
2724     return false;
2725   }
2726 
2727   switch (getBooleanContents(N->getValueType(0))) {
2728   case UndefinedBooleanContent:
2729     return CVal[0];
2730   case ZeroOrOneBooleanContent:
2731     return CVal.isOneValue();
2732   case ZeroOrNegativeOneBooleanContent:
2733     return CVal.isAllOnesValue();
2734   }
2735 
2736   llvm_unreachable("Invalid boolean contents");
2737 }
2738 
2739 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2740   if (!N)
2741     return false;
2742 
2743   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2744   if (!CN) {
2745     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2746     if (!BV)
2747       return false;
2748 
2749     // Only interested in constant splats, we don't care about undef
2750     // elements in identifying boolean constants and getConstantSplatNode
2751     // returns NULL if all ops are undef;
2752     CN = BV->getConstantSplatNode();
2753     if (!CN)
2754       return false;
2755   }
2756 
2757   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2758     return !CN->getAPIntValue()[0];
2759 
2760   return CN->isNullValue();
2761 }
2762 
2763 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2764                                        bool SExt) const {
2765   if (VT == MVT::i1)
2766     return N->isOne();
2767 
2768   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2769   switch (Cnt) {
2770   case TargetLowering::ZeroOrOneBooleanContent:
2771     // An extended value of 1 is always true, unless its original type is i1,
2772     // in which case it will be sign extended to -1.
2773     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
2774   case TargetLowering::UndefinedBooleanContent:
2775   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2776     return N->isAllOnesValue() && SExt;
2777   }
2778   llvm_unreachable("Unexpected enumeration.");
2779 }
2780 
2781 /// This helper function of SimplifySetCC tries to optimize the comparison when
2782 /// either operand of the SetCC node is a bitwise-and instruction.
2783 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2784                                          ISD::CondCode Cond, const SDLoc &DL,
2785                                          DAGCombinerInfo &DCI) const {
2786   // Match these patterns in any of their permutations:
2787   // (X & Y) == Y
2788   // (X & Y) != Y
2789   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
2790     std::swap(N0, N1);
2791 
2792   EVT OpVT = N0.getValueType();
2793   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
2794       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
2795     return SDValue();
2796 
2797   SDValue X, Y;
2798   if (N0.getOperand(0) == N1) {
2799     X = N0.getOperand(1);
2800     Y = N0.getOperand(0);
2801   } else if (N0.getOperand(1) == N1) {
2802     X = N0.getOperand(0);
2803     Y = N0.getOperand(1);
2804   } else {
2805     return SDValue();
2806   }
2807 
2808   SelectionDAG &DAG = DCI.DAG;
2809   SDValue Zero = DAG.getConstant(0, DL, OpVT);
2810   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2811     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2812     // Note that where Y is variable and is known to have at most one bit set
2813     // (for example, if it is Z & 1) we cannot do this; the expressions are not
2814     // equivalent when Y == 0.
2815     Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
2816     if (DCI.isBeforeLegalizeOps() ||
2817         isCondCodeLegal(Cond, N0.getSimpleValueType()))
2818       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2819   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
2820     // If the target supports an 'and-not' or 'and-complement' logic operation,
2821     // try to use that to make a comparison operation more efficient.
2822     // But don't do this transform if the mask is a single bit because there are
2823     // more efficient ways to deal with that case (for example, 'bt' on x86 or
2824     // 'rlwinm' on PPC).
2825 
2826     // Bail out if the compare operand that we want to turn into a zero is
2827     // already a zero (otherwise, infinite loop).
2828     auto *YConst = dyn_cast<ConstantSDNode>(Y);
2829     if (YConst && YConst->isNullValue())
2830       return SDValue();
2831 
2832     // Transform this into: ~X & Y == 0.
2833     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2834     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2835     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2836   }
2837 
2838   return SDValue();
2839 }
2840 
2841 /// There are multiple IR patterns that could be checking whether certain
2842 /// truncation of a signed number would be lossy or not. The pattern which is
2843 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2844 /// We are looking for the following pattern: (KeptBits is a constant)
2845 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2846 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2847 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
2848 /// We will unfold it into the natural trunc+sext pattern:
2849 ///   ((%x << C) a>> C) dstcond %x
2850 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
2851 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2852     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2853     const SDLoc &DL) const {
2854   // We must be comparing with a constant.
2855   ConstantSDNode *C1;
2856   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2857     return SDValue();
2858 
2859   // N0 should be:  add %x, (1 << (KeptBits-1))
2860   if (N0->getOpcode() != ISD::ADD)
2861     return SDValue();
2862 
2863   // And we must be 'add'ing a constant.
2864   ConstantSDNode *C01;
2865   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2866     return SDValue();
2867 
2868   SDValue X = N0->getOperand(0);
2869   EVT XVT = X.getValueType();
2870 
2871   // Validate constants ...
2872 
2873   APInt I1 = C1->getAPIntValue();
2874 
2875   ISD::CondCode NewCond;
2876   if (Cond == ISD::CondCode::SETULT) {
2877     NewCond = ISD::CondCode::SETEQ;
2878   } else if (Cond == ISD::CondCode::SETULE) {
2879     NewCond = ISD::CondCode::SETEQ;
2880     // But need to 'canonicalize' the constant.
2881     I1 += 1;
2882   } else if (Cond == ISD::CondCode::SETUGT) {
2883     NewCond = ISD::CondCode::SETNE;
2884     // But need to 'canonicalize' the constant.
2885     I1 += 1;
2886   } else if (Cond == ISD::CondCode::SETUGE) {
2887     NewCond = ISD::CondCode::SETNE;
2888   } else
2889     return SDValue();
2890 
2891   APInt I01 = C01->getAPIntValue();
2892 
2893   auto checkConstants = [&I1, &I01]() -> bool {
2894     // Both of them must be power-of-two, and the constant from setcc is bigger.
2895     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2896   };
2897 
2898   if (checkConstants()) {
2899     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
2900   } else {
2901     // What if we invert constants? (and the target predicate)
2902     I1.negate();
2903     I01.negate();
2904     NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
2905     if (!checkConstants())
2906       return SDValue();
2907     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
2908   }
2909 
2910   // They are power-of-two, so which bit is set?
2911   const unsigned KeptBits = I1.logBase2();
2912   const unsigned KeptBitsMinusOne = I01.logBase2();
2913 
2914   // Magic!
2915   if (KeptBits != (KeptBitsMinusOne + 1))
2916     return SDValue();
2917   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2918 
2919   // We don't want to do this in every single case.
2920   SelectionDAG &DAG = DCI.DAG;
2921   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2922           XVT, KeptBits))
2923     return SDValue();
2924 
2925   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2926   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2927 
2928   // Unfold into:  ((%x << C) a>> C) cond %x
2929   // Where 'cond' will be either 'eq' or 'ne'.
2930   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2931   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2932   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2933   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2934 
2935   return T2;
2936 }
2937 
2938 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
2939 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
2940     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
2941     DAGCombinerInfo &DCI, const SDLoc &DL) const {
2942   assert(isConstOrConstSplat(N1C) &&
2943          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
2944          "Should be a comparison with 0.");
2945   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2946          "Valid only for [in]equality comparisons.");
2947 
2948   unsigned NewShiftOpcode;
2949   SDValue X, C, Y;
2950 
2951   SelectionDAG &DAG = DCI.DAG;
2952   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2953 
2954   // Look for '(C l>>/<< Y)'.
2955   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
2956     // The shift should be one-use.
2957     if (!V.hasOneUse())
2958       return false;
2959     unsigned OldShiftOpcode = V.getOpcode();
2960     switch (OldShiftOpcode) {
2961     case ISD::SHL:
2962       NewShiftOpcode = ISD::SRL;
2963       break;
2964     case ISD::SRL:
2965       NewShiftOpcode = ISD::SHL;
2966       break;
2967     default:
2968       return false; // must be a logical shift.
2969     }
2970     // We should be shifting a constant.
2971     // FIXME: best to use isConstantOrConstantVector().
2972     C = V.getOperand(0);
2973     ConstantSDNode *CC =
2974         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2975     if (!CC)
2976       return false;
2977     Y = V.getOperand(1);
2978 
2979     ConstantSDNode *XC =
2980         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2981     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2982         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
2983   };
2984 
2985   // LHS of comparison should be an one-use 'and'.
2986   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
2987     return SDValue();
2988 
2989   X = N0.getOperand(0);
2990   SDValue Mask = N0.getOperand(1);
2991 
2992   // 'and' is commutative!
2993   if (!Match(Mask)) {
2994     std::swap(X, Mask);
2995     if (!Match(Mask))
2996       return SDValue();
2997   }
2998 
2999   EVT VT = X.getValueType();
3000 
3001   // Produce:
3002   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3003   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3004   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3005   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3006   return T2;
3007 }
3008 
3009 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3010 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3011 /// handle the commuted versions of these patterns.
3012 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3013                                            ISD::CondCode Cond, const SDLoc &DL,
3014                                            DAGCombinerInfo &DCI) const {
3015   unsigned BOpcode = N0.getOpcode();
3016   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3017          "Unexpected binop");
3018   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3019 
3020   // (X + Y) == X --> Y == 0
3021   // (X - Y) == X --> Y == 0
3022   // (X ^ Y) == X --> Y == 0
3023   SelectionDAG &DAG = DCI.DAG;
3024   EVT OpVT = N0.getValueType();
3025   SDValue X = N0.getOperand(0);
3026   SDValue Y = N0.getOperand(1);
3027   if (X == N1)
3028     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3029 
3030   if (Y != N1)
3031     return SDValue();
3032 
3033   // (X + Y) == Y --> X == 0
3034   // (X ^ Y) == Y --> X == 0
3035   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3036     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3037 
3038   // The shift would not be valid if the operands are boolean (i1).
3039   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3040     return SDValue();
3041 
3042   // (X - Y) == Y --> X == Y << 1
3043   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3044                                  !DCI.isBeforeLegalize());
3045   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3046   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3047   if (!DCI.isCalledByLegalizer())
3048     DCI.AddToWorklist(YShl1.getNode());
3049   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3050 }
3051 
3052 /// Try to simplify a setcc built with the specified operands and cc. If it is
3053 /// unable to simplify it, return a null SDValue.
3054 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3055                                       ISD::CondCode Cond, bool foldBooleans,
3056                                       DAGCombinerInfo &DCI,
3057                                       const SDLoc &dl) const {
3058   SelectionDAG &DAG = DCI.DAG;
3059   const DataLayout &Layout = DAG.getDataLayout();
3060   EVT OpVT = N0.getValueType();
3061 
3062   // Constant fold or commute setcc.
3063   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3064     return Fold;
3065 
3066   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3067   // TODO: Handle non-splat vector constants. All undef causes trouble.
3068   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3069   if (isConstOrConstSplat(N0) &&
3070       (DCI.isBeforeLegalizeOps() ||
3071        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3072     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3073 
3074   // If we have a subtract with the same 2 non-constant operands as this setcc
3075   // -- but in reverse order -- then try to commute the operands of this setcc
3076   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3077   // instruction on some targets.
3078   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3079       (DCI.isBeforeLegalizeOps() ||
3080        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3081       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3082       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3083     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3084 
3085   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3086     const APInt &C1 = N1C->getAPIntValue();
3087 
3088     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3089     // equality comparison, then we're just comparing whether X itself is
3090     // zero.
3091     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3092         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3093         N0.getOperand(1).getOpcode() == ISD::Constant) {
3094       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3095       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3096           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3097         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3098           // (srl (ctlz x), 5) == 0  -> X != 0
3099           // (srl (ctlz x), 5) != 1  -> X != 0
3100           Cond = ISD::SETNE;
3101         } else {
3102           // (srl (ctlz x), 5) != 0  -> X == 0
3103           // (srl (ctlz x), 5) == 1  -> X == 0
3104           Cond = ISD::SETEQ;
3105         }
3106         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3107         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3108                             Zero, Cond);
3109       }
3110     }
3111 
3112     SDValue CTPOP = N0;
3113     // Look through truncs that don't change the value of a ctpop.
3114     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3115       CTPOP = N0.getOperand(0);
3116 
3117     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3118         (N0 == CTPOP ||
3119          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3120       EVT CTVT = CTPOP.getValueType();
3121       SDValue CTOp = CTPOP.getOperand(0);
3122 
3123       // (ctpop x) u< 2 -> (x & x-1) == 0
3124       // (ctpop x) u> 1 -> (x & x-1) != 0
3125       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3126         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3127         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3128         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3129         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3130         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3131       }
3132 
3133       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3134       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3135           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3136         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3137         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3138         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3139         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3140         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
3141         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3142         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3143         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3144         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3145         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3146         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3147       }
3148     }
3149 
3150     // (zext x) == C --> x == (trunc C)
3151     // (sext x) == C --> x == (trunc C)
3152     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3153         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3154       unsigned MinBits = N0.getValueSizeInBits();
3155       SDValue PreExt;
3156       bool Signed = false;
3157       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3158         // ZExt
3159         MinBits = N0->getOperand(0).getValueSizeInBits();
3160         PreExt = N0->getOperand(0);
3161       } else if (N0->getOpcode() == ISD::AND) {
3162         // DAGCombine turns costly ZExts into ANDs
3163         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3164           if ((C->getAPIntValue()+1).isPowerOf2()) {
3165             MinBits = C->getAPIntValue().countTrailingOnes();
3166             PreExt = N0->getOperand(0);
3167           }
3168       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3169         // SExt
3170         MinBits = N0->getOperand(0).getValueSizeInBits();
3171         PreExt = N0->getOperand(0);
3172         Signed = true;
3173       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3174         // ZEXTLOAD / SEXTLOAD
3175         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3176           MinBits = LN0->getMemoryVT().getSizeInBits();
3177           PreExt = N0;
3178         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3179           Signed = true;
3180           MinBits = LN0->getMemoryVT().getSizeInBits();
3181           PreExt = N0;
3182         }
3183       }
3184 
3185       // Figure out how many bits we need to preserve this constant.
3186       unsigned ReqdBits = Signed ?
3187         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3188         C1.getActiveBits();
3189 
3190       // Make sure we're not losing bits from the constant.
3191       if (MinBits > 0 &&
3192           MinBits < C1.getBitWidth() &&
3193           MinBits >= ReqdBits) {
3194         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3195         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3196           // Will get folded away.
3197           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3198           if (MinBits == 1 && C1 == 1)
3199             // Invert the condition.
3200             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3201                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3202           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3203           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3204         }
3205 
3206         // If truncating the setcc operands is not desirable, we can still
3207         // simplify the expression in some cases:
3208         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3209         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3210         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3211         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3212         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3213         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3214         SDValue TopSetCC = N0->getOperand(0);
3215         unsigned N0Opc = N0->getOpcode();
3216         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3217         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3218             TopSetCC.getOpcode() == ISD::SETCC &&
3219             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3220             (isConstFalseVal(N1C) ||
3221              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3222 
3223           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3224                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3225 
3226           if (!Inverse)
3227             return TopSetCC;
3228 
3229           ISD::CondCode InvCond = ISD::getSetCCInverse(
3230               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3231               TopSetCC.getOperand(0).getValueType().isInteger());
3232           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3233                                       TopSetCC.getOperand(1),
3234                                       InvCond);
3235         }
3236       }
3237     }
3238 
3239     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3240     // equality or unsigned, and all 1 bits of the const are in the same
3241     // partial word, see if we can shorten the load.
3242     if (DCI.isBeforeLegalize() &&
3243         !ISD::isSignedIntSetCC(Cond) &&
3244         N0.getOpcode() == ISD::AND && C1 == 0 &&
3245         N0.getNode()->hasOneUse() &&
3246         isa<LoadSDNode>(N0.getOperand(0)) &&
3247         N0.getOperand(0).getNode()->hasOneUse() &&
3248         isa<ConstantSDNode>(N0.getOperand(1))) {
3249       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3250       APInt bestMask;
3251       unsigned bestWidth = 0, bestOffset = 0;
3252       if (Lod->isSimple() && Lod->isUnindexed()) {
3253         unsigned origWidth = N0.getValueSizeInBits();
3254         unsigned maskWidth = origWidth;
3255         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3256         // 8 bits, but have to be careful...
3257         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3258           origWidth = Lod->getMemoryVT().getSizeInBits();
3259         const APInt &Mask = N0.getConstantOperandAPInt(1);
3260         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3261           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3262           for (unsigned offset=0; offset<origWidth/width; offset++) {
3263             if (Mask.isSubsetOf(newMask)) {
3264               if (Layout.isLittleEndian())
3265                 bestOffset = (uint64_t)offset * (width/8);
3266               else
3267                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3268               bestMask = Mask.lshr(offset * (width/8) * 8);
3269               bestWidth = width;
3270               break;
3271             }
3272             newMask <<= width;
3273           }
3274         }
3275       }
3276       if (bestWidth) {
3277         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3278         if (newVT.isRound() &&
3279             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3280           EVT PtrType = Lod->getOperand(1).getValueType();
3281           SDValue Ptr = Lod->getBasePtr();
3282           if (bestOffset != 0)
3283             Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
3284                               DAG.getConstant(bestOffset, dl, PtrType));
3285           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3286           SDValue NewLoad = DAG.getLoad(
3287               newVT, dl, Lod->getChain(), Ptr,
3288               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3289           return DAG.getSetCC(dl, VT,
3290                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3291                                       DAG.getConstant(bestMask.trunc(bestWidth),
3292                                                       dl, newVT)),
3293                               DAG.getConstant(0LL, dl, newVT), Cond);
3294         }
3295       }
3296     }
3297 
3298     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3299     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3300       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3301 
3302       // If the comparison constant has bits in the upper part, the
3303       // zero-extended value could never match.
3304       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3305                                               C1.getBitWidth() - InSize))) {
3306         switch (Cond) {
3307         case ISD::SETUGT:
3308         case ISD::SETUGE:
3309         case ISD::SETEQ:
3310           return DAG.getConstant(0, dl, VT);
3311         case ISD::SETULT:
3312         case ISD::SETULE:
3313         case ISD::SETNE:
3314           return DAG.getConstant(1, dl, VT);
3315         case ISD::SETGT:
3316         case ISD::SETGE:
3317           // True if the sign bit of C1 is set.
3318           return DAG.getConstant(C1.isNegative(), dl, VT);
3319         case ISD::SETLT:
3320         case ISD::SETLE:
3321           // True if the sign bit of C1 isn't set.
3322           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3323         default:
3324           break;
3325         }
3326       }
3327 
3328       // Otherwise, we can perform the comparison with the low bits.
3329       switch (Cond) {
3330       case ISD::SETEQ:
3331       case ISD::SETNE:
3332       case ISD::SETUGT:
3333       case ISD::SETUGE:
3334       case ISD::SETULT:
3335       case ISD::SETULE: {
3336         EVT newVT = N0.getOperand(0).getValueType();
3337         if (DCI.isBeforeLegalizeOps() ||
3338             (isOperationLegal(ISD::SETCC, newVT) &&
3339              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3340           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3341           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3342 
3343           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3344                                           NewConst, Cond);
3345           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3346         }
3347         break;
3348       }
3349       default:
3350         break; // todo, be more careful with signed comparisons
3351       }
3352     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3353                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3354       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3355       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3356       EVT ExtDstTy = N0.getValueType();
3357       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3358 
3359       // If the constant doesn't fit into the number of bits for the source of
3360       // the sign extension, it is impossible for both sides to be equal.
3361       if (C1.getMinSignedBits() > ExtSrcTyBits)
3362         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3363 
3364       SDValue ZextOp;
3365       EVT Op0Ty = N0.getOperand(0).getValueType();
3366       if (Op0Ty == ExtSrcTy) {
3367         ZextOp = N0.getOperand(0);
3368       } else {
3369         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3370         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3371                              DAG.getConstant(Imm, dl, Op0Ty));
3372       }
3373       if (!DCI.isCalledByLegalizer())
3374         DCI.AddToWorklist(ZextOp.getNode());
3375       // Otherwise, make this a use of a zext.
3376       return DAG.getSetCC(dl, VT, ZextOp,
3377                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3378                                                               ExtDstTyBits,
3379                                                               ExtSrcTyBits),
3380                                           dl, ExtDstTy),
3381                           Cond);
3382     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3383                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3384       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3385       if (N0.getOpcode() == ISD::SETCC &&
3386           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3387           (N0.getValueType() == MVT::i1 ||
3388            getBooleanContents(N0.getOperand(0).getValueType()) ==
3389                        ZeroOrOneBooleanContent)) {
3390         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3391         if (TrueWhenTrue)
3392           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3393         // Invert the condition.
3394         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3395         CC = ISD::getSetCCInverse(CC,
3396                                   N0.getOperand(0).getValueType().isInteger());
3397         if (DCI.isBeforeLegalizeOps() ||
3398             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3399           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3400       }
3401 
3402       if ((N0.getOpcode() == ISD::XOR ||
3403            (N0.getOpcode() == ISD::AND &&
3404             N0.getOperand(0).getOpcode() == ISD::XOR &&
3405             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3406           isa<ConstantSDNode>(N0.getOperand(1)) &&
3407           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3408         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3409         // can only do this if the top bits are known zero.
3410         unsigned BitWidth = N0.getValueSizeInBits();
3411         if (DAG.MaskedValueIsZero(N0,
3412                                   APInt::getHighBitsSet(BitWidth,
3413                                                         BitWidth-1))) {
3414           // Okay, get the un-inverted input value.
3415           SDValue Val;
3416           if (N0.getOpcode() == ISD::XOR) {
3417             Val = N0.getOperand(0);
3418           } else {
3419             assert(N0.getOpcode() == ISD::AND &&
3420                     N0.getOperand(0).getOpcode() == ISD::XOR);
3421             // ((X^1)&1)^1 -> X & 1
3422             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3423                               N0.getOperand(0).getOperand(0),
3424                               N0.getOperand(1));
3425           }
3426 
3427           return DAG.getSetCC(dl, VT, Val, N1,
3428                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3429         }
3430       } else if (N1C->isOne()) {
3431         SDValue Op0 = N0;
3432         if (Op0.getOpcode() == ISD::TRUNCATE)
3433           Op0 = Op0.getOperand(0);
3434 
3435         if ((Op0.getOpcode() == ISD::XOR) &&
3436             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3437             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3438           SDValue XorLHS = Op0.getOperand(0);
3439           SDValue XorRHS = Op0.getOperand(1);
3440           // Ensure that the input setccs return an i1 type or 0/1 value.
3441           if (Op0.getValueType() == MVT::i1 ||
3442               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3443                       ZeroOrOneBooleanContent &&
3444                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3445                         ZeroOrOneBooleanContent)) {
3446             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3447             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3448             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3449           }
3450         }
3451         if (Op0.getOpcode() == ISD::AND &&
3452             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3453             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3454           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3455           if (Op0.getValueType().bitsGT(VT))
3456             Op0 = DAG.getNode(ISD::AND, dl, VT,
3457                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3458                           DAG.getConstant(1, dl, VT));
3459           else if (Op0.getValueType().bitsLT(VT))
3460             Op0 = DAG.getNode(ISD::AND, dl, VT,
3461                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3462                         DAG.getConstant(1, dl, VT));
3463 
3464           return DAG.getSetCC(dl, VT, Op0,
3465                               DAG.getConstant(0, dl, Op0.getValueType()),
3466                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3467         }
3468         if (Op0.getOpcode() == ISD::AssertZext &&
3469             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3470           return DAG.getSetCC(dl, VT, Op0,
3471                               DAG.getConstant(0, dl, Op0.getValueType()),
3472                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3473       }
3474     }
3475 
3476     // Given:
3477     //   icmp eq/ne (urem %x, %y), 0
3478     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3479     //   icmp eq/ne %x, 0
3480     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3481         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3482       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3483       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3484       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3485         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3486     }
3487 
3488     if (SDValue V =
3489             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3490       return V;
3491   }
3492 
3493   // These simplifications apply to splat vectors as well.
3494   // TODO: Handle more splat vector cases.
3495   if (auto *N1C = isConstOrConstSplat(N1)) {
3496     const APInt &C1 = N1C->getAPIntValue();
3497 
3498     APInt MinVal, MaxVal;
3499     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3500     if (ISD::isSignedIntSetCC(Cond)) {
3501       MinVal = APInt::getSignedMinValue(OperandBitSize);
3502       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3503     } else {
3504       MinVal = APInt::getMinValue(OperandBitSize);
3505       MaxVal = APInt::getMaxValue(OperandBitSize);
3506     }
3507 
3508     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3509     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3510       // X >= MIN --> true
3511       if (C1 == MinVal)
3512         return DAG.getBoolConstant(true, dl, VT, OpVT);
3513 
3514       if (!VT.isVector()) { // TODO: Support this for vectors.
3515         // X >= C0 --> X > (C0 - 1)
3516         APInt C = C1 - 1;
3517         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3518         if ((DCI.isBeforeLegalizeOps() ||
3519              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3520             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3521                                   isLegalICmpImmediate(C.getSExtValue())))) {
3522           return DAG.getSetCC(dl, VT, N0,
3523                               DAG.getConstant(C, dl, N1.getValueType()),
3524                               NewCC);
3525         }
3526       }
3527     }
3528 
3529     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3530       // X <= MAX --> true
3531       if (C1 == MaxVal)
3532         return DAG.getBoolConstant(true, dl, VT, OpVT);
3533 
3534       // X <= C0 --> X < (C0 + 1)
3535       if (!VT.isVector()) { // TODO: Support this for vectors.
3536         APInt C = C1 + 1;
3537         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3538         if ((DCI.isBeforeLegalizeOps() ||
3539              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3540             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3541                                   isLegalICmpImmediate(C.getSExtValue())))) {
3542           return DAG.getSetCC(dl, VT, N0,
3543                               DAG.getConstant(C, dl, N1.getValueType()),
3544                               NewCC);
3545         }
3546       }
3547     }
3548 
3549     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3550       if (C1 == MinVal)
3551         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3552 
3553       // TODO: Support this for vectors after legalize ops.
3554       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3555         // Canonicalize setlt X, Max --> setne X, Max
3556         if (C1 == MaxVal)
3557           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3558 
3559         // If we have setult X, 1, turn it into seteq X, 0
3560         if (C1 == MinVal+1)
3561           return DAG.getSetCC(dl, VT, N0,
3562                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3563                               ISD::SETEQ);
3564       }
3565     }
3566 
3567     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3568       if (C1 == MaxVal)
3569         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3570 
3571       // TODO: Support this for vectors after legalize ops.
3572       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3573         // Canonicalize setgt X, Min --> setne X, Min
3574         if (C1 == MinVal)
3575           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3576 
3577         // If we have setugt X, Max-1, turn it into seteq X, Max
3578         if (C1 == MaxVal-1)
3579           return DAG.getSetCC(dl, VT, N0,
3580                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3581                               ISD::SETEQ);
3582       }
3583     }
3584 
3585     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3586       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3587       if (C1.isNullValue())
3588         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3589                 VT, N0, N1, Cond, DCI, dl))
3590           return CC;
3591     }
3592 
3593     // If we have "setcc X, C0", check to see if we can shrink the immediate
3594     // by changing cc.
3595     // TODO: Support this for vectors after legalize ops.
3596     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3597       // SETUGT X, SINTMAX  -> SETLT X, 0
3598       if (Cond == ISD::SETUGT &&
3599           C1 == APInt::getSignedMaxValue(OperandBitSize))
3600         return DAG.getSetCC(dl, VT, N0,
3601                             DAG.getConstant(0, dl, N1.getValueType()),
3602                             ISD::SETLT);
3603 
3604       // SETULT X, SINTMIN  -> SETGT X, -1
3605       if (Cond == ISD::SETULT &&
3606           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3607         SDValue ConstMinusOne =
3608             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3609                             N1.getValueType());
3610         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3611       }
3612     }
3613   }
3614 
3615   // Back to non-vector simplifications.
3616   // TODO: Can we do these for vector splats?
3617   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3618     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3619     const APInt &C1 = N1C->getAPIntValue();
3620     EVT ShValTy = N0.getValueType();
3621 
3622     // Fold bit comparisons when we can.
3623     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3624         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3625         N0.getOpcode() == ISD::AND) {
3626       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3627         EVT ShiftTy =
3628             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3629         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3630           // Perform the xform if the AND RHS is a single bit.
3631           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3632           if (AndRHS->getAPIntValue().isPowerOf2() &&
3633               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3634             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3635                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3636                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3637           }
3638         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3639           // (X & 8) == 8  -->  (X & 8) >> 3
3640           // Perform the xform if C1 is a single bit.
3641           unsigned ShCt = C1.logBase2();
3642           if (C1.isPowerOf2() &&
3643               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3644             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3645                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3646                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3647           }
3648         }
3649       }
3650     }
3651 
3652     if (C1.getMinSignedBits() <= 64 &&
3653         !isLegalICmpImmediate(C1.getSExtValue())) {
3654       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3655       // (X & -256) == 256 -> (X >> 8) == 1
3656       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3657           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3658         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3659           const APInt &AndRHSC = AndRHS->getAPIntValue();
3660           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3661             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3662             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3663               SDValue Shift =
3664                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3665                             DAG.getConstant(ShiftBits, dl, ShiftTy));
3666               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3667               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3668             }
3669           }
3670         }
3671       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3672                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3673         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3674         // X <  0x100000000 -> (X >> 32) <  1
3675         // X >= 0x100000000 -> (X >> 32) >= 1
3676         // X <= 0x0ffffffff -> (X >> 32) <  1
3677         // X >  0x0ffffffff -> (X >> 32) >= 1
3678         unsigned ShiftBits;
3679         APInt NewC = C1;
3680         ISD::CondCode NewCond = Cond;
3681         if (AdjOne) {
3682           ShiftBits = C1.countTrailingOnes();
3683           NewC = NewC + 1;
3684           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3685         } else {
3686           ShiftBits = C1.countTrailingZeros();
3687         }
3688         NewC.lshrInPlace(ShiftBits);
3689         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3690             isLegalICmpImmediate(NewC.getSExtValue()) &&
3691             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3692           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3693                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3694           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3695           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3696         }
3697       }
3698     }
3699   }
3700 
3701   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3702     auto *CFP = cast<ConstantFPSDNode>(N1);
3703     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3704 
3705     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3706     // constant if knowing that the operand is non-nan is enough.  We prefer to
3707     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3708     // materialize 0.0.
3709     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3710       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3711 
3712     // setcc (fneg x), C -> setcc swap(pred) x, -C
3713     if (N0.getOpcode() == ISD::FNEG) {
3714       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3715       if (DCI.isBeforeLegalizeOps() ||
3716           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3717         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3718         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3719       }
3720     }
3721 
3722     // If the condition is not legal, see if we can find an equivalent one
3723     // which is legal.
3724     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3725       // If the comparison was an awkward floating-point == or != and one of
3726       // the comparison operands is infinity or negative infinity, convert the
3727       // condition to a less-awkward <= or >=.
3728       if (CFP->getValueAPF().isInfinity()) {
3729         if (CFP->getValueAPF().isNegative()) {
3730           if (Cond == ISD::SETOEQ &&
3731               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3732             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3733           if (Cond == ISD::SETUEQ &&
3734               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3735             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3736           if (Cond == ISD::SETUNE &&
3737               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3738             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3739           if (Cond == ISD::SETONE &&
3740               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3741             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3742         } else {
3743           if (Cond == ISD::SETOEQ &&
3744               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3745             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3746           if (Cond == ISD::SETUEQ &&
3747               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3748             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3749           if (Cond == ISD::SETUNE &&
3750               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3751             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3752           if (Cond == ISD::SETONE &&
3753               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3754             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3755         }
3756       }
3757     }
3758   }
3759 
3760   if (N0 == N1) {
3761     // The sext(setcc()) => setcc() optimization relies on the appropriate
3762     // constant being emitted.
3763     assert(!N0.getValueType().isInteger() &&
3764            "Integer types should be handled by FoldSetCC");
3765 
3766     bool EqTrue = ISD::isTrueWhenEqual(Cond);
3767     unsigned UOF = ISD::getUnorderedFlavor(Cond);
3768     if (UOF == 2) // FP operators that are undefined on NaNs.
3769       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3770     if (UOF == unsigned(EqTrue))
3771       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3772     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
3773     // if it is not already.
3774     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
3775     if (NewCond != Cond &&
3776         (DCI.isBeforeLegalizeOps() ||
3777                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
3778       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3779   }
3780 
3781   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3782       N0.getValueType().isInteger()) {
3783     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
3784         N0.getOpcode() == ISD::XOR) {
3785       // Simplify (X+Y) == (X+Z) -->  Y == Z
3786       if (N0.getOpcode() == N1.getOpcode()) {
3787         if (N0.getOperand(0) == N1.getOperand(0))
3788           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3789         if (N0.getOperand(1) == N1.getOperand(1))
3790           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3791         if (isCommutativeBinOp(N0.getOpcode())) {
3792           // If X op Y == Y op X, try other combinations.
3793           if (N0.getOperand(0) == N1.getOperand(1))
3794             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3795                                 Cond);
3796           if (N0.getOperand(1) == N1.getOperand(0))
3797             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3798                                 Cond);
3799         }
3800       }
3801 
3802       // If RHS is a legal immediate value for a compare instruction, we need
3803       // to be careful about increasing register pressure needlessly.
3804       bool LegalRHSImm = false;
3805 
3806       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3807         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3808           // Turn (X+C1) == C2 --> X == C2-C1
3809           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
3810             return DAG.getSetCC(dl, VT, N0.getOperand(0),
3811                                 DAG.getConstant(RHSC->getAPIntValue()-
3812                                                 LHSR->getAPIntValue(),
3813                                 dl, N0.getValueType()), Cond);
3814           }
3815 
3816           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3817           if (N0.getOpcode() == ISD::XOR)
3818             // If we know that all of the inverted bits are zero, don't bother
3819             // performing the inversion.
3820             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3821               return
3822                 DAG.getSetCC(dl, VT, N0.getOperand(0),
3823                              DAG.getConstant(LHSR->getAPIntValue() ^
3824                                                RHSC->getAPIntValue(),
3825                                              dl, N0.getValueType()),
3826                              Cond);
3827         }
3828 
3829         // Turn (C1-X) == C2 --> X == C1-C2
3830         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3831           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3832             return
3833               DAG.getSetCC(dl, VT, N0.getOperand(1),
3834                            DAG.getConstant(SUBC->getAPIntValue() -
3835                                              RHSC->getAPIntValue(),
3836                                            dl, N0.getValueType()),
3837                            Cond);
3838           }
3839         }
3840 
3841         // Could RHSC fold directly into a compare?
3842         if (RHSC->getValueType(0).getSizeInBits() <= 64)
3843           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3844       }
3845 
3846       // (X+Y) == X --> Y == 0 and similar folds.
3847       // Don't do this if X is an immediate that can fold into a cmp
3848       // instruction and X+Y has other uses. It could be an induction variable
3849       // chain, and the transform would increase register pressure.
3850       if (!LegalRHSImm || N0.hasOneUse())
3851         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3852           return V;
3853     }
3854 
3855     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
3856         N1.getOpcode() == ISD::XOR)
3857       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3858         return V;
3859 
3860     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3861       return V;
3862   }
3863 
3864   // Fold remainder of division by a constant.
3865   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
3866       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3867     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3868 
3869     // When division is cheap or optimizing for minimum size,
3870     // fall through to DIVREM creation by skipping this fold.
3871     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
3872       if (N0.getOpcode() == ISD::UREM) {
3873         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3874           return Folded;
3875       } else if (N0.getOpcode() == ISD::SREM) {
3876         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
3877           return Folded;
3878       }
3879     }
3880   }
3881 
3882   // Fold away ALL boolean setcc's.
3883   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
3884     SDValue Temp;
3885     switch (Cond) {
3886     default: llvm_unreachable("Unknown integer setcc!");
3887     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
3888       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3889       N0 = DAG.getNOT(dl, Temp, OpVT);
3890       if (!DCI.isCalledByLegalizer())
3891         DCI.AddToWorklist(Temp.getNode());
3892       break;
3893     case ISD::SETNE:  // X != Y   -->  (X^Y)
3894       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3895       break;
3896     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
3897     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
3898       Temp = DAG.getNOT(dl, N0, OpVT);
3899       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3900       if (!DCI.isCalledByLegalizer())
3901         DCI.AddToWorklist(Temp.getNode());
3902       break;
3903     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
3904     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
3905       Temp = DAG.getNOT(dl, N1, OpVT);
3906       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3907       if (!DCI.isCalledByLegalizer())
3908         DCI.AddToWorklist(Temp.getNode());
3909       break;
3910     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
3911     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
3912       Temp = DAG.getNOT(dl, N0, OpVT);
3913       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3914       if (!DCI.isCalledByLegalizer())
3915         DCI.AddToWorklist(Temp.getNode());
3916       break;
3917     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
3918     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
3919       Temp = DAG.getNOT(dl, N1, OpVT);
3920       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3921       break;
3922     }
3923     if (VT.getScalarType() != MVT::i1) {
3924       if (!DCI.isCalledByLegalizer())
3925         DCI.AddToWorklist(N0.getNode());
3926       // FIXME: If running after legalize, we probably can't do this.
3927       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3928       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3929     }
3930     return N0;
3931   }
3932 
3933   // Could not fold it.
3934   return SDValue();
3935 }
3936 
3937 /// Returns true (and the GlobalValue and the offset) if the node is a
3938 /// GlobalAddress + offset.
3939 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
3940                                     int64_t &Offset) const {
3941 
3942   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
3943 
3944   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
3945     GA = GASD->getGlobal();
3946     Offset += GASD->getOffset();
3947     return true;
3948   }
3949 
3950   if (N->getOpcode() == ISD::ADD) {
3951     SDValue N1 = N->getOperand(0);
3952     SDValue N2 = N->getOperand(1);
3953     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
3954       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
3955         Offset += V->getSExtValue();
3956         return true;
3957       }
3958     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
3959       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
3960         Offset += V->getSExtValue();
3961         return true;
3962       }
3963     }
3964   }
3965 
3966   return false;
3967 }
3968 
3969 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
3970                                           DAGCombinerInfo &DCI) const {
3971   // Default implementation: no optimization.
3972   return SDValue();
3973 }
3974 
3975 //===----------------------------------------------------------------------===//
3976 //  Inline Assembler Implementation Methods
3977 //===----------------------------------------------------------------------===//
3978 
3979 TargetLowering::ConstraintType
3980 TargetLowering::getConstraintType(StringRef Constraint) const {
3981   unsigned S = Constraint.size();
3982 
3983   if (S == 1) {
3984     switch (Constraint[0]) {
3985     default: break;
3986     case 'r':
3987       return C_RegisterClass;
3988     case 'm': // memory
3989     case 'o': // offsetable
3990     case 'V': // not offsetable
3991       return C_Memory;
3992     case 'n': // Simple Integer
3993     case 'E': // Floating Point Constant
3994     case 'F': // Floating Point Constant
3995       return C_Immediate;
3996     case 'i': // Simple Integer or Relocatable Constant
3997     case 's': // Relocatable Constant
3998     case 'p': // Address.
3999     case 'X': // Allow ANY value.
4000     case 'I': // Target registers.
4001     case 'J':
4002     case 'K':
4003     case 'L':
4004     case 'M':
4005     case 'N':
4006     case 'O':
4007     case 'P':
4008     case '<':
4009     case '>':
4010       return C_Other;
4011     }
4012   }
4013 
4014   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4015     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4016       return C_Memory;
4017     return C_Register;
4018   }
4019   return C_Unknown;
4020 }
4021 
4022 /// Try to replace an X constraint, which matches anything, with another that
4023 /// has more specific requirements based on the type of the corresponding
4024 /// operand.
4025 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4026   if (ConstraintVT.isInteger())
4027     return "r";
4028   if (ConstraintVT.isFloatingPoint())
4029     return "f"; // works for many targets
4030   return nullptr;
4031 }
4032 
4033 SDValue TargetLowering::LowerAsmOutputForConstraint(
4034     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4035     SelectionDAG &DAG) const {
4036   return SDValue();
4037 }
4038 
4039 /// Lower the specified operand into the Ops vector.
4040 /// If it is invalid, don't add anything to Ops.
4041 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4042                                                   std::string &Constraint,
4043                                                   std::vector<SDValue> &Ops,
4044                                                   SelectionDAG &DAG) const {
4045 
4046   if (Constraint.length() > 1) return;
4047 
4048   char ConstraintLetter = Constraint[0];
4049   switch (ConstraintLetter) {
4050   default: break;
4051   case 'X':     // Allows any operand; labels (basic block) use this.
4052     if (Op.getOpcode() == ISD::BasicBlock ||
4053         Op.getOpcode() == ISD::TargetBlockAddress) {
4054       Ops.push_back(Op);
4055       return;
4056     }
4057     LLVM_FALLTHROUGH;
4058   case 'i':    // Simple Integer or Relocatable Constant
4059   case 'n':    // Simple Integer
4060   case 's': {  // Relocatable Constant
4061 
4062     GlobalAddressSDNode *GA;
4063     ConstantSDNode *C;
4064     BlockAddressSDNode *BA;
4065     uint64_t Offset = 0;
4066 
4067     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4068     // etc., since getelementpointer is variadic. We can't use
4069     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4070     // while in this case the GA may be furthest from the root node which is
4071     // likely an ISD::ADD.
4072     while (1) {
4073       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4074         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4075                                                  GA->getValueType(0),
4076                                                  Offset + GA->getOffset()));
4077         return;
4078       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4079                  ConstraintLetter != 's') {
4080         // gcc prints these as sign extended.  Sign extend value to 64 bits
4081         // now; without this it would get ZExt'd later in
4082         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4083         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4084         BooleanContent BCont = getBooleanContents(MVT::i64);
4085         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4086                                       : ISD::SIGN_EXTEND;
4087         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4088                                                     : C->getSExtValue();
4089         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4090                                             SDLoc(C), MVT::i64));
4091         return;
4092       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4093                  ConstraintLetter != 'n') {
4094         Ops.push_back(DAG.getTargetBlockAddress(
4095             BA->getBlockAddress(), BA->getValueType(0),
4096             Offset + BA->getOffset(), BA->getTargetFlags()));
4097         return;
4098       } else {
4099         const unsigned OpCode = Op.getOpcode();
4100         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4101           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4102             Op = Op.getOperand(1);
4103           // Subtraction is not commutative.
4104           else if (OpCode == ISD::ADD &&
4105                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4106             Op = Op.getOperand(0);
4107           else
4108             return;
4109           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4110           continue;
4111         }
4112       }
4113       return;
4114     }
4115     break;
4116   }
4117   }
4118 }
4119 
4120 std::pair<unsigned, const TargetRegisterClass *>
4121 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4122                                              StringRef Constraint,
4123                                              MVT VT) const {
4124   if (Constraint.empty() || Constraint[0] != '{')
4125     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4126   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4127 
4128   // Remove the braces from around the name.
4129   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4130 
4131   std::pair<unsigned, const TargetRegisterClass *> R =
4132       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4133 
4134   // Figure out which register class contains this reg.
4135   for (const TargetRegisterClass *RC : RI->regclasses()) {
4136     // If none of the value types for this register class are valid, we
4137     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4138     if (!isLegalRC(*RI, *RC))
4139       continue;
4140 
4141     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4142          I != E; ++I) {
4143       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4144         std::pair<unsigned, const TargetRegisterClass *> S =
4145             std::make_pair(*I, RC);
4146 
4147         // If this register class has the requested value type, return it,
4148         // otherwise keep searching and return the first class found
4149         // if no other is found which explicitly has the requested type.
4150         if (RI->isTypeLegalForClass(*RC, VT))
4151           return S;
4152         if (!R.second)
4153           R = S;
4154       }
4155     }
4156   }
4157 
4158   return R;
4159 }
4160 
4161 //===----------------------------------------------------------------------===//
4162 // Constraint Selection.
4163 
4164 /// Return true of this is an input operand that is a matching constraint like
4165 /// "4".
4166 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4167   assert(!ConstraintCode.empty() && "No known constraint!");
4168   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4169 }
4170 
4171 /// If this is an input matching constraint, this method returns the output
4172 /// operand it matches.
4173 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4174   assert(!ConstraintCode.empty() && "No known constraint!");
4175   return atoi(ConstraintCode.c_str());
4176 }
4177 
4178 /// Split up the constraint string from the inline assembly value into the
4179 /// specific constraints and their prefixes, and also tie in the associated
4180 /// operand values.
4181 /// If this returns an empty vector, and if the constraint string itself
4182 /// isn't empty, there was an error parsing.
4183 TargetLowering::AsmOperandInfoVector
4184 TargetLowering::ParseConstraints(const DataLayout &DL,
4185                                  const TargetRegisterInfo *TRI,
4186                                  ImmutableCallSite CS) const {
4187   /// Information about all of the constraints.
4188   AsmOperandInfoVector ConstraintOperands;
4189   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4190   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4191 
4192   // Do a prepass over the constraints, canonicalizing them, and building up the
4193   // ConstraintOperands list.
4194   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4195   unsigned ResNo = 0; // ResNo - The result number of the next output.
4196 
4197   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4198     ConstraintOperands.emplace_back(std::move(CI));
4199     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4200 
4201     // Update multiple alternative constraint count.
4202     if (OpInfo.multipleAlternatives.size() > maCount)
4203       maCount = OpInfo.multipleAlternatives.size();
4204 
4205     OpInfo.ConstraintVT = MVT::Other;
4206 
4207     // Compute the value type for each operand.
4208     switch (OpInfo.Type) {
4209     case InlineAsm::isOutput:
4210       // Indirect outputs just consume an argument.
4211       if (OpInfo.isIndirect) {
4212         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4213         break;
4214       }
4215 
4216       // The return value of the call is this value.  As such, there is no
4217       // corresponding argument.
4218       assert(!CS.getType()->isVoidTy() &&
4219              "Bad inline asm!");
4220       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
4221         OpInfo.ConstraintVT =
4222             getSimpleValueType(DL, STy->getElementType(ResNo));
4223       } else {
4224         assert(ResNo == 0 && "Asm only has one result!");
4225         OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
4226       }
4227       ++ResNo;
4228       break;
4229     case InlineAsm::isInput:
4230       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4231       break;
4232     case InlineAsm::isClobber:
4233       // Nothing to do.
4234       break;
4235     }
4236 
4237     if (OpInfo.CallOperandVal) {
4238       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4239       if (OpInfo.isIndirect) {
4240         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4241         if (!PtrTy)
4242           report_fatal_error("Indirect operand for inline asm not a pointer!");
4243         OpTy = PtrTy->getElementType();
4244       }
4245 
4246       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4247       if (StructType *STy = dyn_cast<StructType>(OpTy))
4248         if (STy->getNumElements() == 1)
4249           OpTy = STy->getElementType(0);
4250 
4251       // If OpTy is not a single value, it may be a struct/union that we
4252       // can tile with integers.
4253       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4254         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4255         switch (BitSize) {
4256         default: break;
4257         case 1:
4258         case 8:
4259         case 16:
4260         case 32:
4261         case 64:
4262         case 128:
4263           OpInfo.ConstraintVT =
4264               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4265           break;
4266         }
4267       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4268         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4269         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4270       } else {
4271         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4272       }
4273     }
4274   }
4275 
4276   // If we have multiple alternative constraints, select the best alternative.
4277   if (!ConstraintOperands.empty()) {
4278     if (maCount) {
4279       unsigned bestMAIndex = 0;
4280       int bestWeight = -1;
4281       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4282       int weight = -1;
4283       unsigned maIndex;
4284       // Compute the sums of the weights for each alternative, keeping track
4285       // of the best (highest weight) one so far.
4286       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4287         int weightSum = 0;
4288         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4289              cIndex != eIndex; ++cIndex) {
4290           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4291           if (OpInfo.Type == InlineAsm::isClobber)
4292             continue;
4293 
4294           // If this is an output operand with a matching input operand,
4295           // look up the matching input. If their types mismatch, e.g. one
4296           // is an integer, the other is floating point, or their sizes are
4297           // different, flag it as an maCantMatch.
4298           if (OpInfo.hasMatchingInput()) {
4299             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4300             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4301               if ((OpInfo.ConstraintVT.isInteger() !=
4302                    Input.ConstraintVT.isInteger()) ||
4303                   (OpInfo.ConstraintVT.getSizeInBits() !=
4304                    Input.ConstraintVT.getSizeInBits())) {
4305                 weightSum = -1; // Can't match.
4306                 break;
4307               }
4308             }
4309           }
4310           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4311           if (weight == -1) {
4312             weightSum = -1;
4313             break;
4314           }
4315           weightSum += weight;
4316         }
4317         // Update best.
4318         if (weightSum > bestWeight) {
4319           bestWeight = weightSum;
4320           bestMAIndex = maIndex;
4321         }
4322       }
4323 
4324       // Now select chosen alternative in each constraint.
4325       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4326            cIndex != eIndex; ++cIndex) {
4327         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4328         if (cInfo.Type == InlineAsm::isClobber)
4329           continue;
4330         cInfo.selectAlternative(bestMAIndex);
4331       }
4332     }
4333   }
4334 
4335   // Check and hook up tied operands, choose constraint code to use.
4336   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4337        cIndex != eIndex; ++cIndex) {
4338     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4339 
4340     // If this is an output operand with a matching input operand, look up the
4341     // matching input. If their types mismatch, e.g. one is an integer, the
4342     // other is floating point, or their sizes are different, flag it as an
4343     // error.
4344     if (OpInfo.hasMatchingInput()) {
4345       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4346 
4347       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4348         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4349             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4350                                          OpInfo.ConstraintVT);
4351         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4352             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4353                                          Input.ConstraintVT);
4354         if ((OpInfo.ConstraintVT.isInteger() !=
4355              Input.ConstraintVT.isInteger()) ||
4356             (MatchRC.second != InputRC.second)) {
4357           report_fatal_error("Unsupported asm: input constraint"
4358                              " with a matching output constraint of"
4359                              " incompatible type!");
4360         }
4361       }
4362     }
4363   }
4364 
4365   return ConstraintOperands;
4366 }
4367 
4368 /// Return an integer indicating how general CT is.
4369 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4370   switch (CT) {
4371   case TargetLowering::C_Immediate:
4372   case TargetLowering::C_Other:
4373   case TargetLowering::C_Unknown:
4374     return 0;
4375   case TargetLowering::C_Register:
4376     return 1;
4377   case TargetLowering::C_RegisterClass:
4378     return 2;
4379   case TargetLowering::C_Memory:
4380     return 3;
4381   }
4382   llvm_unreachable("Invalid constraint type");
4383 }
4384 
4385 /// Examine constraint type and operand type and determine a weight value.
4386 /// This object must already have been set up with the operand type
4387 /// and the current alternative constraint selected.
4388 TargetLowering::ConstraintWeight
4389   TargetLowering::getMultipleConstraintMatchWeight(
4390     AsmOperandInfo &info, int maIndex) const {
4391   InlineAsm::ConstraintCodeVector *rCodes;
4392   if (maIndex >= (int)info.multipleAlternatives.size())
4393     rCodes = &info.Codes;
4394   else
4395     rCodes = &info.multipleAlternatives[maIndex].Codes;
4396   ConstraintWeight BestWeight = CW_Invalid;
4397 
4398   // Loop over the options, keeping track of the most general one.
4399   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4400     ConstraintWeight weight =
4401       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4402     if (weight > BestWeight)
4403       BestWeight = weight;
4404   }
4405 
4406   return BestWeight;
4407 }
4408 
4409 /// Examine constraint type and operand type and determine a weight value.
4410 /// This object must already have been set up with the operand type
4411 /// and the current alternative constraint selected.
4412 TargetLowering::ConstraintWeight
4413   TargetLowering::getSingleConstraintMatchWeight(
4414     AsmOperandInfo &info, const char *constraint) const {
4415   ConstraintWeight weight = CW_Invalid;
4416   Value *CallOperandVal = info.CallOperandVal;
4417     // If we don't have a value, we can't do a match,
4418     // but allow it at the lowest weight.
4419   if (!CallOperandVal)
4420     return CW_Default;
4421   // Look at the constraint type.
4422   switch (*constraint) {
4423     case 'i': // immediate integer.
4424     case 'n': // immediate integer with a known value.
4425       if (isa<ConstantInt>(CallOperandVal))
4426         weight = CW_Constant;
4427       break;
4428     case 's': // non-explicit intregal immediate.
4429       if (isa<GlobalValue>(CallOperandVal))
4430         weight = CW_Constant;
4431       break;
4432     case 'E': // immediate float if host format.
4433     case 'F': // immediate float.
4434       if (isa<ConstantFP>(CallOperandVal))
4435         weight = CW_Constant;
4436       break;
4437     case '<': // memory operand with autodecrement.
4438     case '>': // memory operand with autoincrement.
4439     case 'm': // memory operand.
4440     case 'o': // offsettable memory operand
4441     case 'V': // non-offsettable memory operand
4442       weight = CW_Memory;
4443       break;
4444     case 'r': // general register.
4445     case 'g': // general register, memory operand or immediate integer.
4446               // note: Clang converts "g" to "imr".
4447       if (CallOperandVal->getType()->isIntegerTy())
4448         weight = CW_Register;
4449       break;
4450     case 'X': // any operand.
4451   default:
4452     weight = CW_Default;
4453     break;
4454   }
4455   return weight;
4456 }
4457 
4458 /// If there are multiple different constraints that we could pick for this
4459 /// operand (e.g. "imr") try to pick the 'best' one.
4460 /// This is somewhat tricky: constraints fall into four classes:
4461 ///    Other         -> immediates and magic values
4462 ///    Register      -> one specific register
4463 ///    RegisterClass -> a group of regs
4464 ///    Memory        -> memory
4465 /// Ideally, we would pick the most specific constraint possible: if we have
4466 /// something that fits into a register, we would pick it.  The problem here
4467 /// is that if we have something that could either be in a register or in
4468 /// memory that use of the register could cause selection of *other*
4469 /// operands to fail: they might only succeed if we pick memory.  Because of
4470 /// this the heuristic we use is:
4471 ///
4472 ///  1) If there is an 'other' constraint, and if the operand is valid for
4473 ///     that constraint, use it.  This makes us take advantage of 'i'
4474 ///     constraints when available.
4475 ///  2) Otherwise, pick the most general constraint present.  This prefers
4476 ///     'm' over 'r', for example.
4477 ///
4478 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4479                              const TargetLowering &TLI,
4480                              SDValue Op, SelectionDAG *DAG) {
4481   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4482   unsigned BestIdx = 0;
4483   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4484   int BestGenerality = -1;
4485 
4486   // Loop over the options, keeping track of the most general one.
4487   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4488     TargetLowering::ConstraintType CType =
4489       TLI.getConstraintType(OpInfo.Codes[i]);
4490 
4491     // If this is an 'other' or 'immediate' constraint, see if the operand is
4492     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4493     // the operand is an integer in the range [0..31] we want to use I (saving a
4494     // load of a register), otherwise we must use 'r'.
4495     if ((CType == TargetLowering::C_Other ||
4496          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4497       assert(OpInfo.Codes[i].size() == 1 &&
4498              "Unhandled multi-letter 'other' constraint");
4499       std::vector<SDValue> ResultOps;
4500       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4501                                        ResultOps, *DAG);
4502       if (!ResultOps.empty()) {
4503         BestType = CType;
4504         BestIdx = i;
4505         break;
4506       }
4507     }
4508 
4509     // Things with matching constraints can only be registers, per gcc
4510     // documentation.  This mainly affects "g" constraints.
4511     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4512       continue;
4513 
4514     // This constraint letter is more general than the previous one, use it.
4515     int Generality = getConstraintGenerality(CType);
4516     if (Generality > BestGenerality) {
4517       BestType = CType;
4518       BestIdx = i;
4519       BestGenerality = Generality;
4520     }
4521   }
4522 
4523   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4524   OpInfo.ConstraintType = BestType;
4525 }
4526 
4527 /// Determines the constraint code and constraint type to use for the specific
4528 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4529 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4530                                             SDValue Op,
4531                                             SelectionDAG *DAG) const {
4532   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4533 
4534   // Single-letter constraints ('r') are very common.
4535   if (OpInfo.Codes.size() == 1) {
4536     OpInfo.ConstraintCode = OpInfo.Codes[0];
4537     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4538   } else {
4539     ChooseConstraint(OpInfo, *this, Op, DAG);
4540   }
4541 
4542   // 'X' matches anything.
4543   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4544     // Labels and constants are handled elsewhere ('X' is the only thing
4545     // that matches labels).  For Functions, the type here is the type of
4546     // the result, which is not what we want to look at; leave them alone.
4547     Value *v = OpInfo.CallOperandVal;
4548     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4549       OpInfo.CallOperandVal = v;
4550       return;
4551     }
4552 
4553     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4554       return;
4555 
4556     // Otherwise, try to resolve it to something we know about by looking at
4557     // the actual operand type.
4558     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4559       OpInfo.ConstraintCode = Repl;
4560       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4561     }
4562   }
4563 }
4564 
4565 /// Given an exact SDIV by a constant, create a multiplication
4566 /// with the multiplicative inverse of the constant.
4567 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4568                               const SDLoc &dl, SelectionDAG &DAG,
4569                               SmallVectorImpl<SDNode *> &Created) {
4570   SDValue Op0 = N->getOperand(0);
4571   SDValue Op1 = N->getOperand(1);
4572   EVT VT = N->getValueType(0);
4573   EVT SVT = VT.getScalarType();
4574   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4575   EVT ShSVT = ShVT.getScalarType();
4576 
4577   bool UseSRA = false;
4578   SmallVector<SDValue, 16> Shifts, Factors;
4579 
4580   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4581     if (C->isNullValue())
4582       return false;
4583     APInt Divisor = C->getAPIntValue();
4584     unsigned Shift = Divisor.countTrailingZeros();
4585     if (Shift) {
4586       Divisor.ashrInPlace(Shift);
4587       UseSRA = true;
4588     }
4589     // Calculate the multiplicative inverse, using Newton's method.
4590     APInt t;
4591     APInt Factor = Divisor;
4592     while ((t = Divisor * Factor) != 1)
4593       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4594     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4595     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4596     return true;
4597   };
4598 
4599   // Collect all magic values from the build vector.
4600   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4601     return SDValue();
4602 
4603   SDValue Shift, Factor;
4604   if (VT.isVector()) {
4605     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4606     Factor = DAG.getBuildVector(VT, dl, Factors);
4607   } else {
4608     Shift = Shifts[0];
4609     Factor = Factors[0];
4610   }
4611 
4612   SDValue Res = Op0;
4613 
4614   // Shift the value upfront if it is even, so the LSB is one.
4615   if (UseSRA) {
4616     // TODO: For UDIV use SRL instead of SRA.
4617     SDNodeFlags Flags;
4618     Flags.setExact(true);
4619     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4620     Created.push_back(Res.getNode());
4621   }
4622 
4623   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4624 }
4625 
4626 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4627                               SelectionDAG &DAG,
4628                               SmallVectorImpl<SDNode *> &Created) const {
4629   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4630   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4631   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4632     return SDValue(N, 0); // Lower SDIV as SDIV
4633   return SDValue();
4634 }
4635 
4636 /// Given an ISD::SDIV node expressing a divide by constant,
4637 /// return a DAG expression to select that will generate the same value by
4638 /// multiplying by a magic number.
4639 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4640 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4641                                   bool IsAfterLegalization,
4642                                   SmallVectorImpl<SDNode *> &Created) const {
4643   SDLoc dl(N);
4644   EVT VT = N->getValueType(0);
4645   EVT SVT = VT.getScalarType();
4646   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4647   EVT ShSVT = ShVT.getScalarType();
4648   unsigned EltBits = VT.getScalarSizeInBits();
4649 
4650   // Check to see if we can do this.
4651   // FIXME: We should be more aggressive here.
4652   if (!isTypeLegal(VT))
4653     return SDValue();
4654 
4655   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4656   if (N->getFlags().hasExact())
4657     return BuildExactSDIV(*this, N, dl, DAG, Created);
4658 
4659   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4660 
4661   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4662     if (C->isNullValue())
4663       return false;
4664 
4665     const APInt &Divisor = C->getAPIntValue();
4666     APInt::ms magics = Divisor.magic();
4667     int NumeratorFactor = 0;
4668     int ShiftMask = -1;
4669 
4670     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4671       // If d is +1/-1, we just multiply the numerator by +1/-1.
4672       NumeratorFactor = Divisor.getSExtValue();
4673       magics.m = 0;
4674       magics.s = 0;
4675       ShiftMask = 0;
4676     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4677       // If d > 0 and m < 0, add the numerator.
4678       NumeratorFactor = 1;
4679     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4680       // If d < 0 and m > 0, subtract the numerator.
4681       NumeratorFactor = -1;
4682     }
4683 
4684     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4685     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4686     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4687     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4688     return true;
4689   };
4690 
4691   SDValue N0 = N->getOperand(0);
4692   SDValue N1 = N->getOperand(1);
4693 
4694   // Collect the shifts / magic values from each element.
4695   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4696     return SDValue();
4697 
4698   SDValue MagicFactor, Factor, Shift, ShiftMask;
4699   if (VT.isVector()) {
4700     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4701     Factor = DAG.getBuildVector(VT, dl, Factors);
4702     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4703     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4704   } else {
4705     MagicFactor = MagicFactors[0];
4706     Factor = Factors[0];
4707     Shift = Shifts[0];
4708     ShiftMask = ShiftMasks[0];
4709   }
4710 
4711   // Multiply the numerator (operand 0) by the magic value.
4712   // FIXME: We should support doing a MUL in a wider type.
4713   SDValue Q;
4714   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4715                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4716     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4717   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4718                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4719     SDValue LoHi =
4720         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4721     Q = SDValue(LoHi.getNode(), 1);
4722   } else
4723     return SDValue(); // No mulhs or equivalent.
4724   Created.push_back(Q.getNode());
4725 
4726   // (Optionally) Add/subtract the numerator using Factor.
4727   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4728   Created.push_back(Factor.getNode());
4729   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4730   Created.push_back(Q.getNode());
4731 
4732   // Shift right algebraic by shift value.
4733   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4734   Created.push_back(Q.getNode());
4735 
4736   // Extract the sign bit, mask it and add it to the quotient.
4737   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4738   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4739   Created.push_back(T.getNode());
4740   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4741   Created.push_back(T.getNode());
4742   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4743 }
4744 
4745 /// Given an ISD::UDIV node expressing a divide by constant,
4746 /// return a DAG expression to select that will generate the same value by
4747 /// multiplying by a magic number.
4748 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4749 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4750                                   bool IsAfterLegalization,
4751                                   SmallVectorImpl<SDNode *> &Created) const {
4752   SDLoc dl(N);
4753   EVT VT = N->getValueType(0);
4754   EVT SVT = VT.getScalarType();
4755   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4756   EVT ShSVT = ShVT.getScalarType();
4757   unsigned EltBits = VT.getScalarSizeInBits();
4758 
4759   // Check to see if we can do this.
4760   // FIXME: We should be more aggressive here.
4761   if (!isTypeLegal(VT))
4762     return SDValue();
4763 
4764   bool UseNPQ = false;
4765   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4766 
4767   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4768     if (C->isNullValue())
4769       return false;
4770     // FIXME: We should use a narrower constant when the upper
4771     // bits are known to be zero.
4772     APInt Divisor = C->getAPIntValue();
4773     APInt::mu magics = Divisor.magicu();
4774     unsigned PreShift = 0, PostShift = 0;
4775 
4776     // If the divisor is even, we can avoid using the expensive fixup by
4777     // shifting the divided value upfront.
4778     if (magics.a != 0 && !Divisor[0]) {
4779       PreShift = Divisor.countTrailingZeros();
4780       // Get magic number for the shifted divisor.
4781       magics = Divisor.lshr(PreShift).magicu(PreShift);
4782       assert(magics.a == 0 && "Should use cheap fixup now");
4783     }
4784 
4785     APInt Magic = magics.m;
4786 
4787     unsigned SelNPQ;
4788     if (magics.a == 0 || Divisor.isOneValue()) {
4789       assert(magics.s < Divisor.getBitWidth() &&
4790              "We shouldn't generate an undefined shift!");
4791       PostShift = magics.s;
4792       SelNPQ = false;
4793     } else {
4794       PostShift = magics.s - 1;
4795       SelNPQ = true;
4796     }
4797 
4798     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4799     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4800     NPQFactors.push_back(
4801         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4802                                : APInt::getNullValue(EltBits),
4803                         dl, SVT));
4804     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4805     UseNPQ |= SelNPQ;
4806     return true;
4807   };
4808 
4809   SDValue N0 = N->getOperand(0);
4810   SDValue N1 = N->getOperand(1);
4811 
4812   // Collect the shifts/magic values from each element.
4813   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4814     return SDValue();
4815 
4816   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4817   if (VT.isVector()) {
4818     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4819     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4820     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4821     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4822   } else {
4823     PreShift = PreShifts[0];
4824     MagicFactor = MagicFactors[0];
4825     PostShift = PostShifts[0];
4826   }
4827 
4828   SDValue Q = N0;
4829   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4830   Created.push_back(Q.getNode());
4831 
4832   // FIXME: We should support doing a MUL in a wider type.
4833   auto GetMULHU = [&](SDValue X, SDValue Y) {
4834     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
4835                             : isOperationLegalOrCustom(ISD::MULHU, VT))
4836       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4837     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
4838                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
4839       SDValue LoHi =
4840           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4841       return SDValue(LoHi.getNode(), 1);
4842     }
4843     return SDValue(); // No mulhu or equivalent
4844   };
4845 
4846   // Multiply the numerator (operand 0) by the magic value.
4847   Q = GetMULHU(Q, MagicFactor);
4848   if (!Q)
4849     return SDValue();
4850 
4851   Created.push_back(Q.getNode());
4852 
4853   if (UseNPQ) {
4854     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4855     Created.push_back(NPQ.getNode());
4856 
4857     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4858     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4859     if (VT.isVector())
4860       NPQ = GetMULHU(NPQ, NPQFactor);
4861     else
4862       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4863 
4864     Created.push_back(NPQ.getNode());
4865 
4866     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4867     Created.push_back(Q.getNode());
4868   }
4869 
4870   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4871   Created.push_back(Q.getNode());
4872 
4873   SDValue One = DAG.getConstant(1, dl, VT);
4874   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4875   return DAG.getSelect(dl, VT, IsOne, N0, Q);
4876 }
4877 
4878 /// If all values in Values that *don't* match the predicate are same 'splat'
4879 /// value, then replace all values with that splat value.
4880 /// Else, if AlternativeReplacement was provided, then replace all values that
4881 /// do match predicate with AlternativeReplacement value.
4882 static void
4883 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4884                           std::function<bool(SDValue)> Predicate,
4885                           SDValue AlternativeReplacement = SDValue()) {
4886   SDValue Replacement;
4887   // Is there a value for which the Predicate does *NOT* match? What is it?
4888   auto SplatValue = llvm::find_if_not(Values, Predicate);
4889   if (SplatValue != Values.end()) {
4890     // Does Values consist only of SplatValue's and values matching Predicate?
4891     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
4892           return Value == *SplatValue || Predicate(Value);
4893         })) // Then we shall replace values matching predicate with SplatValue.
4894       Replacement = *SplatValue;
4895   }
4896   if (!Replacement) {
4897     // Oops, we did not find the "baseline" splat value.
4898     if (!AlternativeReplacement)
4899       return; // Nothing to do.
4900     // Let's replace with provided value then.
4901     Replacement = AlternativeReplacement;
4902   }
4903   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4904 }
4905 
4906 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4907 /// where the divisor is constant and the comparison target is zero,
4908 /// return a DAG expression that will generate the same comparison result
4909 /// using only multiplications, additions and shifts/rotations.
4910 /// Ref: "Hacker's Delight" 10-17.
4911 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4912                                         SDValue CompTargetNode,
4913                                         ISD::CondCode Cond,
4914                                         DAGCombinerInfo &DCI,
4915                                         const SDLoc &DL) const {
4916   SmallVector<SDNode *, 5> Built;
4917   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4918                                          DCI, DL, Built)) {
4919     for (SDNode *N : Built)
4920       DCI.AddToWorklist(N);
4921     return Folded;
4922   }
4923 
4924   return SDValue();
4925 }
4926 
4927 SDValue
4928 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4929                                   SDValue CompTargetNode, ISD::CondCode Cond,
4930                                   DAGCombinerInfo &DCI, const SDLoc &DL,
4931                                   SmallVectorImpl<SDNode *> &Created) const {
4932   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4933   // - D must be constant, with D = D0 * 2^K where D0 is odd
4934   // - P is the multiplicative inverse of D0 modulo 2^W
4935   // - Q = floor(((2^W) - 1) / D)
4936   // where W is the width of the common type of N and D.
4937   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4938          "Only applicable for (in)equality comparisons.");
4939 
4940   SelectionDAG &DAG = DCI.DAG;
4941 
4942   EVT VT = REMNode.getValueType();
4943   EVT SVT = VT.getScalarType();
4944   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4945   EVT ShSVT = ShVT.getScalarType();
4946 
4947   // If MUL is unavailable, we cannot proceed in any case.
4948   if (!isOperationLegalOrCustom(ISD::MUL, VT))
4949     return SDValue();
4950 
4951   bool ComparingWithAllZeros = true;
4952   bool AllComparisonsWithNonZerosAreTautological = true;
4953   bool HadTautologicalLanes = false;
4954   bool AllLanesAreTautological = true;
4955   bool HadEvenDivisor = false;
4956   bool AllDivisorsArePowerOfTwo = true;
4957   bool HadTautologicalInvertedLanes = false;
4958   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
4959 
4960   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
4961     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
4962     if (CDiv->isNullValue())
4963       return false;
4964 
4965     const APInt &D = CDiv->getAPIntValue();
4966     const APInt &Cmp = CCmp->getAPIntValue();
4967 
4968     ComparingWithAllZeros &= Cmp.isNullValue();
4969 
4970     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
4971     // if C2 is not less than C1, the comparison is always false.
4972     // But we will only be able to produce the comparison that will give the
4973     // opposive tautological answer. So this lane would need to be fixed up.
4974     bool TautologicalInvertedLane = D.ule(Cmp);
4975     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
4976 
4977     // If all lanes are tautological (either all divisors are ones, or divisor
4978     // is not greater than the constant we are comparing with),
4979     // we will prefer to avoid the fold.
4980     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
4981     HadTautologicalLanes |= TautologicalLane;
4982     AllLanesAreTautological &= TautologicalLane;
4983 
4984     // If we are comparing with non-zero, we need'll need  to subtract said
4985     // comparison value from the LHS. But there is no point in doing that if
4986     // every lane where we are comparing with non-zero is tautological..
4987     if (!Cmp.isNullValue())
4988       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
4989 
4990     // Decompose D into D0 * 2^K
4991     unsigned K = D.countTrailingZeros();
4992     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
4993     APInt D0 = D.lshr(K);
4994 
4995     // D is even if it has trailing zeros.
4996     HadEvenDivisor |= (K != 0);
4997     // D is a power-of-two if D0 is one.
4998     // If all divisors are power-of-two, we will prefer to avoid the fold.
4999     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5000 
5001     // P = inv(D0, 2^W)
5002     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5003     unsigned W = D.getBitWidth();
5004     APInt P = D0.zext(W + 1)
5005                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5006                   .trunc(W);
5007     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5008     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5009 
5010     // Q = floor((2^W - 1) u/ D)
5011     // R = ((2^W - 1) u% D)
5012     APInt Q, R;
5013     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5014 
5015     // If we are comparing with zero, then that comparison constant is okay,
5016     // else it may need to be one less than that.
5017     if (Cmp.ugt(R))
5018       Q -= 1;
5019 
5020     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5021            "We are expecting that K is always less than all-ones for ShSVT");
5022 
5023     // If the lane is tautological the result can be constant-folded.
5024     if (TautologicalLane) {
5025       // Set P and K amount to a bogus values so we can try to splat them.
5026       P = 0;
5027       K = -1;
5028       // And ensure that comparison constant is tautological,
5029       // it will always compare true/false.
5030       Q = -1;
5031     }
5032 
5033     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5034     KAmts.push_back(
5035         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5036     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5037     return true;
5038   };
5039 
5040   SDValue N = REMNode.getOperand(0);
5041   SDValue D = REMNode.getOperand(1);
5042 
5043   // Collect the values from each element.
5044   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5045     return SDValue();
5046 
5047   // If all lanes are tautological, the result can be constant-folded.
5048   if (AllLanesAreTautological)
5049     return SDValue();
5050 
5051   // If this is a urem by a powers-of-two, avoid the fold since it can be
5052   // best implemented as a bit test.
5053   if (AllDivisorsArePowerOfTwo)
5054     return SDValue();
5055 
5056   SDValue PVal, KVal, QVal;
5057   if (VT.isVector()) {
5058     if (HadTautologicalLanes) {
5059       // Try to turn PAmts into a splat, since we don't care about the values
5060       // that are currently '0'. If we can't, just keep '0'`s.
5061       turnVectorIntoSplatVector(PAmts, isNullConstant);
5062       // Try to turn KAmts into a splat, since we don't care about the values
5063       // that are currently '-1'. If we can't, change them to '0'`s.
5064       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5065                                 DAG.getConstant(0, DL, ShSVT));
5066     }
5067 
5068     PVal = DAG.getBuildVector(VT, DL, PAmts);
5069     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5070     QVal = DAG.getBuildVector(VT, DL, QAmts);
5071   } else {
5072     PVal = PAmts[0];
5073     KVal = KAmts[0];
5074     QVal = QAmts[0];
5075   }
5076 
5077   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5078     if (!isOperationLegalOrCustom(ISD::SUB, VT))
5079       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5080     assert(CompTargetNode.getValueType() == N.getValueType() &&
5081            "Expecting that the types on LHS and RHS of comparisons match.");
5082     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5083   }
5084 
5085   // (mul N, P)
5086   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5087   Created.push_back(Op0.getNode());
5088 
5089   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5090   // divisors as a performance improvement, since rotating by 0 is a no-op.
5091   if (HadEvenDivisor) {
5092     // We need ROTR to do this.
5093     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5094       return SDValue();
5095     SDNodeFlags Flags;
5096     Flags.setExact(true);
5097     // UREM: (rotr (mul N, P), K)
5098     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5099     Created.push_back(Op0.getNode());
5100   }
5101 
5102   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5103   SDValue NewCC =
5104       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5105                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5106   if (!HadTautologicalInvertedLanes)
5107     return NewCC;
5108 
5109   // If any lanes previously compared always-false, the NewCC will give
5110   // always-true result for them, so we need to fixup those lanes.
5111   // Or the other way around for inequality predicate.
5112   assert(VT.isVector() && "Can/should only get here for vectors.");
5113   Created.push_back(NewCC.getNode());
5114 
5115   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5116   // if C2 is not less than C1, the comparison is always false.
5117   // But we have produced the comparison that will give the
5118   // opposive tautological answer. So these lanes would need to be fixed up.
5119   SDValue TautologicalInvertedChannels =
5120       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5121   Created.push_back(TautologicalInvertedChannels.getNode());
5122 
5123   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5124     // If we have a vector select, let's replace the comparison results in the
5125     // affected lanes with the correct tautological result.
5126     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5127                                               DL, SETCCVT, SETCCVT);
5128     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5129                        Replacement, NewCC);
5130   }
5131 
5132   // Else, we can just invert the comparison result in the appropriate lanes.
5133   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5134     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5135                        TautologicalInvertedChannels);
5136 
5137   return SDValue(); // Don't know how to lower.
5138 }
5139 
5140 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5141 /// where the divisor is constant and the comparison target is zero,
5142 /// return a DAG expression that will generate the same comparison result
5143 /// using only multiplications, additions and shifts/rotations.
5144 /// Ref: "Hacker's Delight" 10-17.
5145 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5146                                         SDValue CompTargetNode,
5147                                         ISD::CondCode Cond,
5148                                         DAGCombinerInfo &DCI,
5149                                         const SDLoc &DL) const {
5150   SmallVector<SDNode *, 7> Built;
5151   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5152                                          DCI, DL, Built)) {
5153     assert(Built.size() <= 7 && "Max size prediction failed.");
5154     for (SDNode *N : Built)
5155       DCI.AddToWorklist(N);
5156     return Folded;
5157   }
5158 
5159   return SDValue();
5160 }
5161 
5162 SDValue
5163 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5164                                   SDValue CompTargetNode, ISD::CondCode Cond,
5165                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5166                                   SmallVectorImpl<SDNode *> &Created) const {
5167   // Fold:
5168   //   (seteq/ne (srem N, D), 0)
5169   // To:
5170   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5171   //
5172   // - D must be constant, with D = D0 * 2^K where D0 is odd
5173   // - P is the multiplicative inverse of D0 modulo 2^W
5174   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5175   // - Q = floor((2 * A) / (2^K))
5176   // where W is the width of the common type of N and D.
5177   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5178          "Only applicable for (in)equality comparisons.");
5179 
5180   SelectionDAG &DAG = DCI.DAG;
5181 
5182   EVT VT = REMNode.getValueType();
5183   EVT SVT = VT.getScalarType();
5184   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5185   EVT ShSVT = ShVT.getScalarType();
5186 
5187   // If MUL is unavailable, we cannot proceed in any case.
5188   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5189     return SDValue();
5190 
5191   // TODO: Could support comparing with non-zero too.
5192   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5193   if (!CompTarget || !CompTarget->isNullValue())
5194     return SDValue();
5195 
5196   bool HadIntMinDivisor = false;
5197   bool HadOneDivisor = false;
5198   bool AllDivisorsAreOnes = true;
5199   bool HadEvenDivisor = false;
5200   bool NeedToApplyOffset = false;
5201   bool AllDivisorsArePowerOfTwo = true;
5202   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5203 
5204   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5205     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5206     if (C->isNullValue())
5207       return false;
5208 
5209     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5210 
5211     // WARNING: this fold is only valid for positive divisors!
5212     APInt D = C->getAPIntValue();
5213     if (D.isNegative())
5214       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5215 
5216     HadIntMinDivisor |= D.isMinSignedValue();
5217 
5218     // If all divisors are ones, we will prefer to avoid the fold.
5219     HadOneDivisor |= D.isOneValue();
5220     AllDivisorsAreOnes &= D.isOneValue();
5221 
5222     // Decompose D into D0 * 2^K
5223     unsigned K = D.countTrailingZeros();
5224     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5225     APInt D0 = D.lshr(K);
5226 
5227     if (!D.isMinSignedValue()) {
5228       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5229       // we don't care about this lane in this fold, we'll special-handle it.
5230       HadEvenDivisor |= (K != 0);
5231     }
5232 
5233     // D is a power-of-two if D0 is one. This includes INT_MIN.
5234     // If all divisors are power-of-two, we will prefer to avoid the fold.
5235     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5236 
5237     // P = inv(D0, 2^W)
5238     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5239     unsigned W = D.getBitWidth();
5240     APInt P = D0.zext(W + 1)
5241                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5242                   .trunc(W);
5243     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5244     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5245 
5246     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5247     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5248     A.clearLowBits(K);
5249 
5250     if (!D.isMinSignedValue()) {
5251       // If divisor INT_MIN, then we don't care about this lane in this fold,
5252       // we'll special-handle it.
5253       NeedToApplyOffset |= A != 0;
5254     }
5255 
5256     // Q = floor((2 * A) / (2^K))
5257     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5258 
5259     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5260            "We are expecting that A is always less than all-ones for SVT");
5261     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5262            "We are expecting that K is always less than all-ones for ShSVT");
5263 
5264     // If the divisor is 1 the result can be constant-folded. Likewise, we
5265     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5266     if (D.isOneValue()) {
5267       // Set P, A and K to a bogus values so we can try to splat them.
5268       P = 0;
5269       A = -1;
5270       K = -1;
5271 
5272       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5273       Q = -1;
5274     }
5275 
5276     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5277     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5278     KAmts.push_back(
5279         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5280     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5281     return true;
5282   };
5283 
5284   SDValue N = REMNode.getOperand(0);
5285   SDValue D = REMNode.getOperand(1);
5286 
5287   // Collect the values from each element.
5288   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5289     return SDValue();
5290 
5291   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5292   if (AllDivisorsAreOnes)
5293     return SDValue();
5294 
5295   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5296   // since it can be best implemented as a bit test.
5297   if (AllDivisorsArePowerOfTwo)
5298     return SDValue();
5299 
5300   SDValue PVal, AVal, KVal, QVal;
5301   if (VT.isVector()) {
5302     if (HadOneDivisor) {
5303       // Try to turn PAmts into a splat, since we don't care about the values
5304       // that are currently '0'. If we can't, just keep '0'`s.
5305       turnVectorIntoSplatVector(PAmts, isNullConstant);
5306       // Try to turn AAmts into a splat, since we don't care about the
5307       // values that are currently '-1'. If we can't, change them to '0'`s.
5308       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5309                                 DAG.getConstant(0, DL, SVT));
5310       // Try to turn KAmts into a splat, since we don't care about the values
5311       // that are currently '-1'. If we can't, change them to '0'`s.
5312       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5313                                 DAG.getConstant(0, DL, ShSVT));
5314     }
5315 
5316     PVal = DAG.getBuildVector(VT, DL, PAmts);
5317     AVal = DAG.getBuildVector(VT, DL, AAmts);
5318     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5319     QVal = DAG.getBuildVector(VT, DL, QAmts);
5320   } else {
5321     PVal = PAmts[0];
5322     AVal = AAmts[0];
5323     KVal = KAmts[0];
5324     QVal = QAmts[0];
5325   }
5326 
5327   // (mul N, P)
5328   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5329   Created.push_back(Op0.getNode());
5330 
5331   if (NeedToApplyOffset) {
5332     // We need ADD to do this.
5333     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5334       return SDValue();
5335 
5336     // (add (mul N, P), A)
5337     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5338     Created.push_back(Op0.getNode());
5339   }
5340 
5341   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5342   // divisors as a performance improvement, since rotating by 0 is a no-op.
5343   if (HadEvenDivisor) {
5344     // We need ROTR to do this.
5345     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5346       return SDValue();
5347     SDNodeFlags Flags;
5348     Flags.setExact(true);
5349     // SREM: (rotr (add (mul N, P), A), K)
5350     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5351     Created.push_back(Op0.getNode());
5352   }
5353 
5354   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5355   SDValue Fold =
5356       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5357                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5358 
5359   // If we didn't have lanes with INT_MIN divisor, then we're done.
5360   if (!HadIntMinDivisor)
5361     return Fold;
5362 
5363   // That fold is only valid for positive divisors. Which effectively means,
5364   // it is invalid for INT_MIN divisors. So if we have such a lane,
5365   // we must fix-up results for said lanes.
5366   assert(VT.isVector() && "Can/should only get here for vectors.");
5367 
5368   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5369       !isOperationLegalOrCustom(ISD::AND, VT) ||
5370       !isOperationLegalOrCustom(Cond, VT) ||
5371       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5372     return SDValue();
5373 
5374   Created.push_back(Fold.getNode());
5375 
5376   SDValue IntMin = DAG.getConstant(
5377       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5378   SDValue IntMax = DAG.getConstant(
5379       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5380   SDValue Zero =
5381       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5382 
5383   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5384   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5385   Created.push_back(DivisorIsIntMin.getNode());
5386 
5387   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5388   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5389   Created.push_back(Masked.getNode());
5390   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5391   Created.push_back(MaskedIsZero.getNode());
5392 
5393   // To produce final result we need to blend 2 vectors: 'SetCC' and
5394   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5395   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5396   // constant-folded, select can get lowered to a shuffle with constant mask.
5397   SDValue Blended =
5398       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5399 
5400   return Blended;
5401 }
5402 
5403 bool TargetLowering::
5404 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5405   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5406     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5407                                 "be a constant integer");
5408     return true;
5409   }
5410 
5411   return false;
5412 }
5413 
5414 char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
5415                                         bool LegalOperations, bool ForCodeSize,
5416                                         unsigned Depth) const {
5417   // fneg is removable even if it has multiple uses.
5418   if (Op.getOpcode() == ISD::FNEG)
5419     return 2;
5420 
5421   // Don't allow anything with multiple uses unless we know it is free.
5422   EVT VT = Op.getValueType();
5423   const SDNodeFlags Flags = Op->getFlags();
5424   const TargetOptions &Options = DAG.getTarget().Options;
5425   if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
5426                            isFPExtFree(VT, Op.getOperand(0).getValueType())))
5427     return 0;
5428 
5429   // Don't recurse exponentially.
5430   if (Depth > SelectionDAG::MaxRecursionDepth)
5431     return 0;
5432 
5433   switch (Op.getOpcode()) {
5434   case ISD::ConstantFP: {
5435     if (!LegalOperations)
5436       return 1;
5437 
5438     // Don't invert constant FP values after legalization unless the target says
5439     // the negated constant is legal.
5440     return isOperationLegal(ISD::ConstantFP, VT) ||
5441            isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5442                         ForCodeSize);
5443   }
5444   case ISD::BUILD_VECTOR: {
5445     // Only permit BUILD_VECTOR of constants.
5446     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5447           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5448         }))
5449       return 0;
5450     if (!LegalOperations)
5451       return 1;
5452     if (isOperationLegal(ISD::ConstantFP, VT) &&
5453         isOperationLegal(ISD::BUILD_VECTOR, VT))
5454       return 1;
5455     return llvm::all_of(Op->op_values(), [&](SDValue N) {
5456       return N.isUndef() ||
5457              isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5458                           ForCodeSize);
5459     });
5460   }
5461   case ISD::FADD:
5462     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5463       return 0;
5464 
5465     // After operation legalization, it might not be legal to create new FSUBs.
5466     if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
5467       return 0;
5468 
5469     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5470     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5471                                     ForCodeSize, Depth + 1))
5472       return V;
5473     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5474     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5475                               ForCodeSize, Depth + 1);
5476   case ISD::FSUB:
5477     // We can't turn -(A-B) into B-A when we honor signed zeros.
5478     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5479       return 0;
5480 
5481     // fold (fneg (fsub A, B)) -> (fsub B, A)
5482     return 1;
5483 
5484   case ISD::FMUL:
5485   case ISD::FDIV:
5486     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
5487     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5488                                     ForCodeSize, Depth + 1))
5489       return V;
5490 
5491     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5492     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5493       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5494         return 0;
5495 
5496     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5497                               ForCodeSize, Depth + 1);
5498 
5499   case ISD::FMA:
5500   case ISD::FMAD: {
5501     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5502       return 0;
5503 
5504     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5505     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5506     char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
5507                                  ForCodeSize, Depth + 1);
5508     if (!V2)
5509       return 0;
5510 
5511     // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
5512     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5513                                  ForCodeSize, Depth + 1);
5514     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5515                                  ForCodeSize, Depth + 1);
5516     char V01 = std::max(V0, V1);
5517     return V01 ? std::max(V01, V2) : 0;
5518   }
5519 
5520   case ISD::FP_EXTEND:
5521   case ISD::FP_ROUND:
5522   case ISD::FSIN:
5523     return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5524                               ForCodeSize, Depth + 1);
5525   }
5526 
5527   return 0;
5528 }
5529 
5530 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5531                                              bool LegalOperations,
5532                                              bool ForCodeSize,
5533                                              unsigned Depth) const {
5534   // fneg is removable even if it has multiple uses.
5535   if (Op.getOpcode() == ISD::FNEG)
5536     return Op.getOperand(0);
5537 
5538   assert(Depth <= SelectionDAG::MaxRecursionDepth &&
5539          "getNegatedExpression doesn't match isNegatibleForFree");
5540   const SDNodeFlags Flags = Op->getFlags();
5541 
5542   switch (Op.getOpcode()) {
5543   case ISD::ConstantFP: {
5544     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5545     V.changeSign();
5546     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
5547   }
5548   case ISD::BUILD_VECTOR: {
5549     SmallVector<SDValue, 4> Ops;
5550     for (SDValue C : Op->op_values()) {
5551       if (C.isUndef()) {
5552         Ops.push_back(C);
5553         continue;
5554       }
5555       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5556       V.changeSign();
5557       Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
5558     }
5559     return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
5560   }
5561   case ISD::FADD:
5562     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5563             Flags.hasNoSignedZeros()) &&
5564            "Expected NSZ fp-flag");
5565 
5566     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5567     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5568                            Depth + 1))
5569       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5570                          getNegatedExpression(Op.getOperand(0), DAG,
5571                                               LegalOperations, ForCodeSize,
5572                                               Depth + 1),
5573                          Op.getOperand(1), Flags);
5574     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5575     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5576                        getNegatedExpression(Op.getOperand(1), DAG,
5577                                             LegalOperations, ForCodeSize,
5578                                             Depth + 1),
5579                        Op.getOperand(0), Flags);
5580   case ISD::FSUB:
5581     // fold (fneg (fsub 0, B)) -> B
5582     if (ConstantFPSDNode *N0CFP =
5583             isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
5584       if (N0CFP->isZero())
5585         return Op.getOperand(1);
5586 
5587     // fold (fneg (fsub A, B)) -> (fsub B, A)
5588     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5589                        Op.getOperand(1), Op.getOperand(0), Flags);
5590 
5591   case ISD::FMUL:
5592   case ISD::FDIV:
5593     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5594     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5595                            Depth + 1))
5596       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5597                          getNegatedExpression(Op.getOperand(0), DAG,
5598                                               LegalOperations, ForCodeSize,
5599                                               Depth + 1),
5600                          Op.getOperand(1), Flags);
5601 
5602     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5603     return DAG.getNode(
5604         Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
5605         getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5606                              ForCodeSize, Depth + 1),
5607         Flags);
5608 
5609   case ISD::FMA:
5610   case ISD::FMAD: {
5611     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5612             Flags.hasNoSignedZeros()) &&
5613            "Expected NSZ fp-flag");
5614 
5615     SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
5616                                         ForCodeSize, Depth + 1);
5617 
5618     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5619                                  ForCodeSize, Depth + 1);
5620     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5621                                  ForCodeSize, Depth + 1);
5622     if (V0 >= V1) {
5623       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5624       SDValue Neg0 = getNegatedExpression(
5625           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
5626       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
5627                          Op.getOperand(1), Neg2, Flags);
5628     }
5629 
5630     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5631     SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5632                                         ForCodeSize, Depth + 1);
5633     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5634                        Op.getOperand(0), Neg1, Neg2, Flags);
5635   }
5636 
5637   case ISD::FP_EXTEND:
5638   case ISD::FSIN:
5639     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5640                        getNegatedExpression(Op.getOperand(0), DAG,
5641                                             LegalOperations, ForCodeSize,
5642                                             Depth + 1));
5643   case ISD::FP_ROUND:
5644     return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
5645                        getNegatedExpression(Op.getOperand(0), DAG,
5646                                             LegalOperations, ForCodeSize,
5647                                             Depth + 1),
5648                        Op.getOperand(1));
5649   }
5650 
5651   llvm_unreachable("Unknown code");
5652 }
5653 
5654 //===----------------------------------------------------------------------===//
5655 // Legalization Utilities
5656 //===----------------------------------------------------------------------===//
5657 
5658 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5659                                     SDValue LHS, SDValue RHS,
5660                                     SmallVectorImpl<SDValue> &Result,
5661                                     EVT HiLoVT, SelectionDAG &DAG,
5662                                     MulExpansionKind Kind, SDValue LL,
5663                                     SDValue LH, SDValue RL, SDValue RH) const {
5664   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5665          Opcode == ISD::SMUL_LOHI);
5666 
5667   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5668                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5669   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5670                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5671   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5672                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5673   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5674                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5675 
5676   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5677     return false;
5678 
5679   unsigned OuterBitSize = VT.getScalarSizeInBits();
5680   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5681   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5682   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5683 
5684   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5685   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5686          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5687 
5688   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5689   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5690                           bool Signed) -> bool {
5691     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5692       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5693       Hi = SDValue(Lo.getNode(), 1);
5694       return true;
5695     }
5696     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5697       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5698       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5699       return true;
5700     }
5701     return false;
5702   };
5703 
5704   SDValue Lo, Hi;
5705 
5706   if (!LL.getNode() && !RL.getNode() &&
5707       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5708     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5709     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5710   }
5711 
5712   if (!LL.getNode())
5713     return false;
5714 
5715   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5716   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5717       DAG.MaskedValueIsZero(RHS, HighMask)) {
5718     // The inputs are both zero-extended.
5719     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5720       Result.push_back(Lo);
5721       Result.push_back(Hi);
5722       if (Opcode != ISD::MUL) {
5723         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5724         Result.push_back(Zero);
5725         Result.push_back(Zero);
5726       }
5727       return true;
5728     }
5729   }
5730 
5731   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5732       RHSSB > InnerBitSize) {
5733     // The input values are both sign-extended.
5734     // TODO non-MUL case?
5735     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5736       Result.push_back(Lo);
5737       Result.push_back(Hi);
5738       return true;
5739     }
5740   }
5741 
5742   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5743   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5744   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5745     // FIXME getShiftAmountTy does not always return a sensible result when VT
5746     // is an illegal type, and so the type may be too small to fit the shift
5747     // amount. Override it with i32. The shift will have to be legalized.
5748     ShiftAmountTy = MVT::i32;
5749   }
5750   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5751 
5752   if (!LH.getNode() && !RH.getNode() &&
5753       isOperationLegalOrCustom(ISD::SRL, VT) &&
5754       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5755     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5756     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
5757     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
5758     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
5759   }
5760 
5761   if (!LH.getNode())
5762     return false;
5763 
5764   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
5765     return false;
5766 
5767   Result.push_back(Lo);
5768 
5769   if (Opcode == ISD::MUL) {
5770     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
5771     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
5772     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
5773     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
5774     Result.push_back(Hi);
5775     return true;
5776   }
5777 
5778   // Compute the full width result.
5779   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
5780     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
5781     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5782     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
5783     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
5784   };
5785 
5786   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5787   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
5788     return false;
5789 
5790   // This is effectively the add part of a multiply-add of half-sized operands,
5791   // so it cannot overflow.
5792   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5793 
5794   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
5795     return false;
5796 
5797   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5798   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5799 
5800   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
5801                   isOperationLegalOrCustom(ISD::ADDE, VT));
5802   if (UseGlue)
5803     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
5804                        Merge(Lo, Hi));
5805   else
5806     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
5807                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
5808 
5809   SDValue Carry = Next.getValue(1);
5810   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5811   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5812 
5813   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
5814     return false;
5815 
5816   if (UseGlue)
5817     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
5818                      Carry);
5819   else
5820     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
5821                      Zero, Carry);
5822 
5823   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5824 
5825   if (Opcode == ISD::SMUL_LOHI) {
5826     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5827                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
5828     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
5829 
5830     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5831                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
5832     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
5833   }
5834 
5835   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5836   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5837   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5838   return true;
5839 }
5840 
5841 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5842                                SelectionDAG &DAG, MulExpansionKind Kind,
5843                                SDValue LL, SDValue LH, SDValue RL,
5844                                SDValue RH) const {
5845   SmallVector<SDValue, 2> Result;
5846   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
5847                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
5848                            DAG, Kind, LL, LH, RL, RH);
5849   if (Ok) {
5850     assert(Result.size() == 2);
5851     Lo = Result[0];
5852     Hi = Result[1];
5853   }
5854   return Ok;
5855 }
5856 
5857 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
5858                                        SelectionDAG &DAG) const {
5859   EVT VT = Node->getValueType(0);
5860 
5861   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5862                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5863                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5864                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
5865     return false;
5866 
5867   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5868   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5869   SDValue X = Node->getOperand(0);
5870   SDValue Y = Node->getOperand(1);
5871   SDValue Z = Node->getOperand(2);
5872 
5873   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5874   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
5875   SDLoc DL(SDValue(Node, 0));
5876 
5877   EVT ShVT = Z.getValueType();
5878   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5879   SDValue Zero = DAG.getConstant(0, DL, ShVT);
5880 
5881   SDValue ShAmt;
5882   if (isPowerOf2_32(EltSizeInBits)) {
5883     SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5884     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
5885   } else {
5886     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
5887   }
5888 
5889   SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
5890   SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
5891   SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5892   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
5893 
5894   // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5895   // and that is undefined. We must compare and select to avoid UB.
5896   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
5897 
5898   // For fshl, 0-shift returns the 1st arg (X).
5899   // For fshr, 0-shift returns the 2nd arg (Y).
5900   SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
5901   Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
5902   return true;
5903 }
5904 
5905 // TODO: Merge with expandFunnelShift.
5906 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
5907                                SelectionDAG &DAG) const {
5908   EVT VT = Node->getValueType(0);
5909   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5910   bool IsLeft = Node->getOpcode() == ISD::ROTL;
5911   SDValue Op0 = Node->getOperand(0);
5912   SDValue Op1 = Node->getOperand(1);
5913   SDLoc DL(SDValue(Node, 0));
5914 
5915   EVT ShVT = Op1.getValueType();
5916   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5917 
5918   // If a rotate in the other direction is legal, use it.
5919   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
5920   if (isOperationLegal(RevRot, VT)) {
5921     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5922     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
5923     return true;
5924   }
5925 
5926   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5927                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5928                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5929                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
5930                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
5931     return false;
5932 
5933   // Otherwise,
5934   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
5935   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
5936   //
5937   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
5938          "Expecting the type bitwidth to be a power of 2");
5939   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
5940   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
5941   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5942   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5943   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
5944   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
5945   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
5946                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
5947   return true;
5948 }
5949 
5950 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
5951                                       SelectionDAG &DAG) const {
5952   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
5953   SDValue Src = Node->getOperand(OpNo);
5954   EVT SrcVT = Src.getValueType();
5955   EVT DstVT = Node->getValueType(0);
5956   SDLoc dl(SDValue(Node, 0));
5957 
5958   // FIXME: Only f32 to i64 conversions are supported.
5959   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
5960     return false;
5961 
5962   if (Node->isStrictFPOpcode())
5963     // When a NaN is converted to an integer a trap is allowed. We can't
5964     // use this expansion here because it would eliminate that trap. Other
5965     // traps are also allowed and cannot be eliminated. See
5966     // IEEE 754-2008 sec 5.8.
5967     return false;
5968 
5969   // Expand f32 -> i64 conversion
5970   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5971   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
5972   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5973   EVT IntVT = SrcVT.changeTypeToInteger();
5974   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
5975 
5976   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
5977   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
5978   SDValue Bias = DAG.getConstant(127, dl, IntVT);
5979   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
5980   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
5981   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
5982 
5983   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
5984 
5985   SDValue ExponentBits = DAG.getNode(
5986       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
5987       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
5988   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
5989 
5990   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
5991                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
5992                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
5993   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
5994 
5995   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
5996                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
5997                           DAG.getConstant(0x00800000, dl, IntVT));
5998 
5999   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6000 
6001   R = DAG.getSelectCC(
6002       dl, Exponent, ExponentLoBit,
6003       DAG.getNode(ISD::SHL, dl, DstVT, R,
6004                   DAG.getZExtOrTrunc(
6005                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6006                       dl, IntShVT)),
6007       DAG.getNode(ISD::SRL, dl, DstVT, R,
6008                   DAG.getZExtOrTrunc(
6009                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6010                       dl, IntShVT)),
6011       ISD::SETGT);
6012 
6013   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6014                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6015 
6016   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6017                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6018   return true;
6019 }
6020 
6021 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6022                                       SDValue &Chain,
6023                                       SelectionDAG &DAG) const {
6024   SDLoc dl(SDValue(Node, 0));
6025   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6026   SDValue Src = Node->getOperand(OpNo);
6027 
6028   EVT SrcVT = Src.getValueType();
6029   EVT DstVT = Node->getValueType(0);
6030   EVT SetCCVT =
6031       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6032 
6033   // Only expand vector types if we have the appropriate vector bit operations.
6034   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6035                                                    ISD::FP_TO_SINT;
6036   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6037                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6038     return false;
6039 
6040   // If the maximum float value is smaller then the signed integer range,
6041   // the destination signmask can't be represented by the float, so we can
6042   // just use FP_TO_SINT directly.
6043   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6044   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6045   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6046   if (APFloat::opOverflow &
6047       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6048     if (Node->isStrictFPOpcode()) {
6049       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6050                            { Node->getOperand(0), Src });
6051       Chain = Result.getValue(1);
6052     } else
6053       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6054     return true;
6055   }
6056 
6057   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6058   SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6059 
6060   bool Strict = Node->isStrictFPOpcode() ||
6061                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6062 
6063   if (Strict) {
6064     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6065     // signmask then offset (the result of which should be fully representable).
6066     // Sel = Src < 0x8000000000000000
6067     // Val = select Sel, Src, Src - 0x8000000000000000
6068     // Ofs = select Sel, 0, 0x8000000000000000
6069     // Result = fp_to_sint(Val) ^ Ofs
6070 
6071     // TODO: Should any fast-math-flags be set for the FSUB?
6072     SDValue SrcBiased;
6073     if (Node->isStrictFPOpcode())
6074       SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6075                               { Node->getOperand(0), Src, Cst });
6076     else
6077       SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
6078     SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
6079     SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
6080                                 DAG.getConstant(SignMask, dl, DstVT));
6081     SDValue SInt;
6082     if (Node->isStrictFPOpcode()) {
6083       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6084                          { SrcBiased.getValue(1), Val });
6085       Chain = SInt.getValue(1);
6086     } else
6087       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6088     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
6089   } else {
6090     // Expand based on maximum range of FP_TO_SINT:
6091     // True = fp_to_sint(Src)
6092     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6093     // Result = select (Src < 0x8000000000000000), True, False
6094 
6095     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6096     // TODO: Should any fast-math-flags be set for the FSUB?
6097     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6098                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6099     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6100                         DAG.getConstant(SignMask, dl, DstVT));
6101     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6102   }
6103   return true;
6104 }
6105 
6106 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6107                                       SelectionDAG &DAG) const {
6108   SDValue Src = Node->getOperand(0);
6109   EVT SrcVT = Src.getValueType();
6110   EVT DstVT = Node->getValueType(0);
6111 
6112   if (SrcVT.getScalarType() != MVT::i64)
6113     return false;
6114 
6115   SDLoc dl(SDValue(Node, 0));
6116   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6117 
6118   if (DstVT.getScalarType() == MVT::f32) {
6119     // Only expand vector types if we have the appropriate vector bit
6120     // operations.
6121     if (SrcVT.isVector() &&
6122         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6123          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6124          !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
6125          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6126          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6127       return false;
6128 
6129     // For unsigned conversions, convert them to signed conversions using the
6130     // algorithm from the x86_64 __floatundidf in compiler_rt.
6131     SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
6132 
6133     SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
6134     SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
6135     SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
6136     SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
6137     SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
6138 
6139     SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
6140     SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
6141 
6142     // TODO: This really should be implemented using a branch rather than a
6143     // select.  We happen to get lucky and machinesink does the right
6144     // thing most of the time.  This would be a good candidate for a
6145     // pseudo-op, or, even better, for whole-function isel.
6146     EVT SetCCVT =
6147         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6148 
6149     SDValue SignBitTest = DAG.getSetCC(
6150         dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
6151     Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
6152     return true;
6153   }
6154 
6155   if (DstVT.getScalarType() == MVT::f64) {
6156     // Only expand vector types if we have the appropriate vector bit
6157     // operations.
6158     if (SrcVT.isVector() &&
6159         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6160          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6161          !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6162          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6163          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6164       return false;
6165 
6166     // Implementation of unsigned i64 to f64 following the algorithm in
6167     // __floatundidf in compiler_rt. This implementation has the advantage
6168     // of performing rounding correctly, both in the default rounding mode
6169     // and in all alternate rounding modes.
6170     SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6171     SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6172         BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6173     SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6174     SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6175     SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6176 
6177     SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6178     SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6179     SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6180     SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6181     SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6182     SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6183     SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6184     Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6185     return true;
6186   }
6187 
6188   return false;
6189 }
6190 
6191 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6192                                               SelectionDAG &DAG) const {
6193   SDLoc dl(Node);
6194   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6195     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6196   EVT VT = Node->getValueType(0);
6197   if (isOperationLegalOrCustom(NewOp, VT)) {
6198     SDValue Quiet0 = Node->getOperand(0);
6199     SDValue Quiet1 = Node->getOperand(1);
6200 
6201     if (!Node->getFlags().hasNoNaNs()) {
6202       // Insert canonicalizes if it's possible we need to quiet to get correct
6203       // sNaN behavior.
6204       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6205         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6206                              Node->getFlags());
6207       }
6208       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6209         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6210                              Node->getFlags());
6211       }
6212     }
6213 
6214     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6215   }
6216 
6217   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6218   // instead if there are no NaNs.
6219   if (Node->getFlags().hasNoNaNs()) {
6220     unsigned IEEE2018Op =
6221         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6222     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6223       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6224                          Node->getOperand(1), Node->getFlags());
6225     }
6226   }
6227 
6228   return SDValue();
6229 }
6230 
6231 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6232                                  SelectionDAG &DAG) const {
6233   SDLoc dl(Node);
6234   EVT VT = Node->getValueType(0);
6235   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6236   SDValue Op = Node->getOperand(0);
6237   unsigned Len = VT.getScalarSizeInBits();
6238   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6239 
6240   // TODO: Add support for irregular type lengths.
6241   if (!(Len <= 128 && Len % 8 == 0))
6242     return false;
6243 
6244   // Only expand vector types if we have the appropriate vector bit operations.
6245   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6246                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6247                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6248                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6249                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6250     return false;
6251 
6252   // This is the "best" algorithm from
6253   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6254   SDValue Mask55 =
6255       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6256   SDValue Mask33 =
6257       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6258   SDValue Mask0F =
6259       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6260   SDValue Mask01 =
6261       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6262 
6263   // v = v - ((v >> 1) & 0x55555555...)
6264   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6265                    DAG.getNode(ISD::AND, dl, VT,
6266                                DAG.getNode(ISD::SRL, dl, VT, Op,
6267                                            DAG.getConstant(1, dl, ShVT)),
6268                                Mask55));
6269   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6270   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6271                    DAG.getNode(ISD::AND, dl, VT,
6272                                DAG.getNode(ISD::SRL, dl, VT, Op,
6273                                            DAG.getConstant(2, dl, ShVT)),
6274                                Mask33));
6275   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6276   Op = DAG.getNode(ISD::AND, dl, VT,
6277                    DAG.getNode(ISD::ADD, dl, VT, Op,
6278                                DAG.getNode(ISD::SRL, dl, VT, Op,
6279                                            DAG.getConstant(4, dl, ShVT))),
6280                    Mask0F);
6281   // v = (v * 0x01010101...) >> (Len - 8)
6282   if (Len > 8)
6283     Op =
6284         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6285                     DAG.getConstant(Len - 8, dl, ShVT));
6286 
6287   Result = Op;
6288   return true;
6289 }
6290 
6291 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6292                                 SelectionDAG &DAG) const {
6293   SDLoc dl(Node);
6294   EVT VT = Node->getValueType(0);
6295   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6296   SDValue Op = Node->getOperand(0);
6297   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6298 
6299   // If the non-ZERO_UNDEF version is supported we can use that instead.
6300   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6301       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6302     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6303     return true;
6304   }
6305 
6306   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6307   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6308     EVT SetCCVT =
6309         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6310     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6311     SDValue Zero = DAG.getConstant(0, dl, VT);
6312     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6313     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6314                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6315     return true;
6316   }
6317 
6318   // Only expand vector types if we have the appropriate vector bit operations.
6319   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6320                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6321                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6322                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6323     return false;
6324 
6325   // for now, we do this:
6326   // x = x | (x >> 1);
6327   // x = x | (x >> 2);
6328   // ...
6329   // x = x | (x >>16);
6330   // x = x | (x >>32); // for 64-bit input
6331   // return popcount(~x);
6332   //
6333   // Ref: "Hacker's Delight" by Henry Warren
6334   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6335     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6336     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6337                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6338   }
6339   Op = DAG.getNOT(dl, Op, VT);
6340   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6341   return true;
6342 }
6343 
6344 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6345                                 SelectionDAG &DAG) const {
6346   SDLoc dl(Node);
6347   EVT VT = Node->getValueType(0);
6348   SDValue Op = Node->getOperand(0);
6349   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6350 
6351   // If the non-ZERO_UNDEF version is supported we can use that instead.
6352   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6353       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6354     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6355     return true;
6356   }
6357 
6358   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6359   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6360     EVT SetCCVT =
6361         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6362     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6363     SDValue Zero = DAG.getConstant(0, dl, VT);
6364     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6365     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6366                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6367     return true;
6368   }
6369 
6370   // Only expand vector types if we have the appropriate vector bit operations.
6371   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6372                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6373                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6374                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6375                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6376                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6377     return false;
6378 
6379   // for now, we use: { return popcount(~x & (x - 1)); }
6380   // unless the target has ctlz but not ctpop, in which case we use:
6381   // { return 32 - nlz(~x & (x-1)); }
6382   // Ref: "Hacker's Delight" by Henry Warren
6383   SDValue Tmp = DAG.getNode(
6384       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6385       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6386 
6387   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6388   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6389     Result =
6390         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6391                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6392     return true;
6393   }
6394 
6395   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6396   return true;
6397 }
6398 
6399 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6400                                SelectionDAG &DAG) const {
6401   SDLoc dl(N);
6402   EVT VT = N->getValueType(0);
6403   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6404   SDValue Op = N->getOperand(0);
6405 
6406   // Only expand vector types if we have the appropriate vector operations.
6407   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6408                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6409                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6410     return false;
6411 
6412   SDValue Shift =
6413       DAG.getNode(ISD::SRA, dl, VT, Op,
6414                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6415   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6416   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6417   return true;
6418 }
6419 
6420 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6421                                             SelectionDAG &DAG) const {
6422   SDLoc SL(LD);
6423   SDValue Chain = LD->getChain();
6424   SDValue BasePTR = LD->getBasePtr();
6425   EVT SrcVT = LD->getMemoryVT();
6426   ISD::LoadExtType ExtType = LD->getExtensionType();
6427 
6428   unsigned NumElem = SrcVT.getVectorNumElements();
6429 
6430   EVT SrcEltVT = SrcVT.getScalarType();
6431   EVT DstEltVT = LD->getValueType(0).getScalarType();
6432 
6433   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6434   assert(SrcEltVT.isByteSized());
6435 
6436   SmallVector<SDValue, 8> Vals;
6437   SmallVector<SDValue, 8> LoadChains;
6438 
6439   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6440     SDValue ScalarLoad =
6441         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6442                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6443                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6444                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6445 
6446     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6447 
6448     Vals.push_back(ScalarLoad.getValue(0));
6449     LoadChains.push_back(ScalarLoad.getValue(1));
6450   }
6451 
6452   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6453   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6454 
6455   return DAG.getMergeValues({Value, NewChain}, SL);
6456 }
6457 
6458 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6459                                              SelectionDAG &DAG) const {
6460   SDLoc SL(ST);
6461 
6462   SDValue Chain = ST->getChain();
6463   SDValue BasePtr = ST->getBasePtr();
6464   SDValue Value = ST->getValue();
6465   EVT StVT = ST->getMemoryVT();
6466 
6467   // The type of the data we want to save
6468   EVT RegVT = Value.getValueType();
6469   EVT RegSclVT = RegVT.getScalarType();
6470 
6471   // The type of data as saved in memory.
6472   EVT MemSclVT = StVT.getScalarType();
6473 
6474   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
6475   unsigned NumElem = StVT.getVectorNumElements();
6476 
6477   // A vector must always be stored in memory as-is, i.e. without any padding
6478   // between the elements, since various code depend on it, e.g. in the
6479   // handling of a bitcast of a vector type to int, which may be done with a
6480   // vector store followed by an integer load. A vector that does not have
6481   // elements that are byte-sized must therefore be stored as an integer
6482   // built out of the extracted vector elements.
6483   if (!MemSclVT.isByteSized()) {
6484     unsigned NumBits = StVT.getSizeInBits();
6485     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6486 
6487     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6488 
6489     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6490       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6491                                 DAG.getConstant(Idx, SL, IdxVT));
6492       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6493       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6494       unsigned ShiftIntoIdx =
6495           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6496       SDValue ShiftAmount =
6497           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6498       SDValue ShiftedElt =
6499           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6500       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6501     }
6502 
6503     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6504                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6505                         ST->getAAInfo());
6506   }
6507 
6508   // Store Stride in bytes
6509   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6510   assert(Stride && "Zero stride!");
6511   // Extract each of the elements from the original vector and save them into
6512   // memory individually.
6513   SmallVector<SDValue, 8> Stores;
6514   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6515     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6516                               DAG.getConstant(Idx, SL, IdxVT));
6517 
6518     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6519 
6520     // This scalar TruncStore may be illegal, but we legalize it later.
6521     SDValue Store = DAG.getTruncStore(
6522         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6523         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6524         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6525 
6526     Stores.push_back(Store);
6527   }
6528 
6529   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6530 }
6531 
6532 std::pair<SDValue, SDValue>
6533 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6534   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6535          "unaligned indexed loads not implemented!");
6536   SDValue Chain = LD->getChain();
6537   SDValue Ptr = LD->getBasePtr();
6538   EVT VT = LD->getValueType(0);
6539   EVT LoadedVT = LD->getMemoryVT();
6540   SDLoc dl(LD);
6541   auto &MF = DAG.getMachineFunction();
6542 
6543   if (VT.isFloatingPoint() || VT.isVector()) {
6544     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6545     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6546       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6547           LoadedVT.isVector()) {
6548         // Scalarize the load and let the individual components be handled.
6549         SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
6550         if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
6551           return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
6552         return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
6553       }
6554 
6555       // Expand to a (misaligned) integer load of the same size,
6556       // then bitconvert to floating point or vector.
6557       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6558                                     LD->getMemOperand());
6559       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6560       if (LoadedVT != VT)
6561         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6562                              ISD::ANY_EXTEND, dl, VT, Result);
6563 
6564       return std::make_pair(Result, newLoad.getValue(1));
6565     }
6566 
6567     // Copy the value to a (aligned) stack slot using (unaligned) integer
6568     // loads and stores, then do a (aligned) load from the stack slot.
6569     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6570     unsigned LoadedBytes = LoadedVT.getStoreSize();
6571     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6572     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6573 
6574     // Make sure the stack slot is also aligned for the register type.
6575     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6576     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6577     SmallVector<SDValue, 8> Stores;
6578     SDValue StackPtr = StackBase;
6579     unsigned Offset = 0;
6580 
6581     EVT PtrVT = Ptr.getValueType();
6582     EVT StackPtrVT = StackPtr.getValueType();
6583 
6584     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6585     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6586 
6587     // Do all but one copies using the full register width.
6588     for (unsigned i = 1; i < NumRegs; i++) {
6589       // Load one integer register's worth from the original location.
6590       SDValue Load = DAG.getLoad(
6591           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6592           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6593           LD->getAAInfo());
6594       // Follow the load with a store to the stack slot.  Remember the store.
6595       Stores.push_back(DAG.getStore(
6596           Load.getValue(1), dl, Load, StackPtr,
6597           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6598       // Increment the pointers.
6599       Offset += RegBytes;
6600 
6601       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6602       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6603     }
6604 
6605     // The last copy may be partial.  Do an extending load.
6606     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6607                                   8 * (LoadedBytes - Offset));
6608     SDValue Load =
6609         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6610                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6611                        MinAlign(LD->getAlignment(), Offset),
6612                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6613     // Follow the load with a store to the stack slot.  Remember the store.
6614     // On big-endian machines this requires a truncating store to ensure
6615     // that the bits end up in the right place.
6616     Stores.push_back(DAG.getTruncStore(
6617         Load.getValue(1), dl, Load, StackPtr,
6618         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6619 
6620     // The order of the stores doesn't matter - say it with a TokenFactor.
6621     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6622 
6623     // Finally, perform the original load only redirected to the stack slot.
6624     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6625                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6626                           LoadedVT);
6627 
6628     // Callers expect a MERGE_VALUES node.
6629     return std::make_pair(Load, TF);
6630   }
6631 
6632   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6633          "Unaligned load of unsupported type.");
6634 
6635   // Compute the new VT that is half the size of the old one.  This is an
6636   // integer MVT.
6637   unsigned NumBits = LoadedVT.getSizeInBits();
6638   EVT NewLoadedVT;
6639   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6640   NumBits >>= 1;
6641 
6642   unsigned Alignment = LD->getAlignment();
6643   unsigned IncrementSize = NumBits / 8;
6644   ISD::LoadExtType HiExtType = LD->getExtensionType();
6645 
6646   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6647   if (HiExtType == ISD::NON_EXTLOAD)
6648     HiExtType = ISD::ZEXTLOAD;
6649 
6650   // Load the value in two parts
6651   SDValue Lo, Hi;
6652   if (DAG.getDataLayout().isLittleEndian()) {
6653     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6654                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6655                         LD->getAAInfo());
6656 
6657     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6658     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6659                         LD->getPointerInfo().getWithOffset(IncrementSize),
6660                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6661                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6662   } else {
6663     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6664                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6665                         LD->getAAInfo());
6666 
6667     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6668     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6669                         LD->getPointerInfo().getWithOffset(IncrementSize),
6670                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6671                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6672   }
6673 
6674   // aggregate the two parts
6675   SDValue ShiftAmount =
6676       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6677                                                     DAG.getDataLayout()));
6678   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6679   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6680 
6681   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6682                              Hi.getValue(1));
6683 
6684   return std::make_pair(Result, TF);
6685 }
6686 
6687 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6688                                              SelectionDAG &DAG) const {
6689   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6690          "unaligned indexed stores not implemented!");
6691   SDValue Chain = ST->getChain();
6692   SDValue Ptr = ST->getBasePtr();
6693   SDValue Val = ST->getValue();
6694   EVT VT = Val.getValueType();
6695   int Alignment = ST->getAlignment();
6696   auto &MF = DAG.getMachineFunction();
6697   EVT StoreMemVT = ST->getMemoryVT();
6698 
6699   SDLoc dl(ST);
6700   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6701     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6702     if (isTypeLegal(intVT)) {
6703       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6704           StoreMemVT.isVector()) {
6705         // Scalarize the store and let the individual components be handled.
6706         SDValue Result = scalarizeVectorStore(ST, DAG);
6707         return Result;
6708       }
6709       // Expand to a bitconvert of the value to the integer type of the
6710       // same size, then a (misaligned) int store.
6711       // FIXME: Does not handle truncating floating point stores!
6712       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
6713       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
6714                             Alignment, ST->getMemOperand()->getFlags());
6715       return Result;
6716     }
6717     // Do a (aligned) store to a stack slot, then copy from the stack slot
6718     // to the final destination using (unaligned) integer loads and stores.
6719     MVT RegVT = getRegisterType(
6720         *DAG.getContext(),
6721         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
6722     EVT PtrVT = Ptr.getValueType();
6723     unsigned StoredBytes = StoreMemVT.getStoreSize();
6724     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6725     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
6726 
6727     // Make sure the stack slot is also aligned for the register type.
6728     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
6729     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6730 
6731     // Perform the original store, only redirected to the stack slot.
6732     SDValue Store = DAG.getTruncStore(
6733         Chain, dl, Val, StackPtr,
6734         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
6735 
6736     EVT StackPtrVT = StackPtr.getValueType();
6737 
6738     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6739     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6740     SmallVector<SDValue, 8> Stores;
6741     unsigned Offset = 0;
6742 
6743     // Do all but one copies using the full register width.
6744     for (unsigned i = 1; i < NumRegs; i++) {
6745       // Load one integer register's worth from the stack slot.
6746       SDValue Load = DAG.getLoad(
6747           RegVT, dl, Store, StackPtr,
6748           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
6749       // Store it to the final location.  Remember the store.
6750       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
6751                                     ST->getPointerInfo().getWithOffset(Offset),
6752                                     MinAlign(ST->getAlignment(), Offset),
6753                                     ST->getMemOperand()->getFlags()));
6754       // Increment the pointers.
6755       Offset += RegBytes;
6756       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6757       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6758     }
6759 
6760     // The last store may be partial.  Do a truncating store.  On big-endian
6761     // machines this requires an extending load from the stack slot to ensure
6762     // that the bits are in the right place.
6763     EVT LoadMemVT =
6764         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
6765 
6766     // Load from the stack slot.
6767     SDValue Load = DAG.getExtLoad(
6768         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
6769         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
6770 
6771     Stores.push_back(
6772         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
6773                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
6774                           MinAlign(ST->getAlignment(), Offset),
6775                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
6776     // The order of the stores doesn't matter - say it with a TokenFactor.
6777     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6778     return Result;
6779   }
6780 
6781   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
6782          "Unaligned store of unknown type.");
6783   // Get the half-size VT
6784   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
6785   int NumBits = NewStoredVT.getSizeInBits();
6786   int IncrementSize = NumBits / 8;
6787 
6788   // Divide the stored value in two parts.
6789   SDValue ShiftAmount = DAG.getConstant(
6790       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
6791   SDValue Lo = Val;
6792   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
6793 
6794   // Store the two parts
6795   SDValue Store1, Store2;
6796   Store1 = DAG.getTruncStore(Chain, dl,
6797                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
6798                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
6799                              ST->getMemOperand()->getFlags());
6800 
6801   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6802   Alignment = MinAlign(Alignment, IncrementSize);
6803   Store2 = DAG.getTruncStore(
6804       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
6805       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
6806       ST->getMemOperand()->getFlags(), ST->getAAInfo());
6807 
6808   SDValue Result =
6809       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
6810   return Result;
6811 }
6812 
6813 SDValue
6814 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
6815                                        const SDLoc &DL, EVT DataVT,
6816                                        SelectionDAG &DAG,
6817                                        bool IsCompressedMemory) const {
6818   SDValue Increment;
6819   EVT AddrVT = Addr.getValueType();
6820   EVT MaskVT = Mask.getValueType();
6821   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
6822          "Incompatible types of Data and Mask");
6823   if (IsCompressedMemory) {
6824     // Incrementing the pointer according to number of '1's in the mask.
6825     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
6826     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
6827     if (MaskIntVT.getSizeInBits() < 32) {
6828       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
6829       MaskIntVT = MVT::i32;
6830     }
6831 
6832     // Count '1's with POPCNT.
6833     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
6834     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
6835     // Scale is an element size in bytes.
6836     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
6837                                     AddrVT);
6838     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
6839   } else
6840     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
6841 
6842   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
6843 }
6844 
6845 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
6846                                        SDValue Idx,
6847                                        EVT VecVT,
6848                                        const SDLoc &dl) {
6849   if (isa<ConstantSDNode>(Idx))
6850     return Idx;
6851 
6852   EVT IdxVT = Idx.getValueType();
6853   unsigned NElts = VecVT.getVectorNumElements();
6854   if (isPowerOf2_32(NElts)) {
6855     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
6856                                      Log2_32(NElts));
6857     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
6858                        DAG.getConstant(Imm, dl, IdxVT));
6859   }
6860 
6861   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
6862                      DAG.getConstant(NElts - 1, dl, IdxVT));
6863 }
6864 
6865 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
6866                                                 SDValue VecPtr, EVT VecVT,
6867                                                 SDValue Index) const {
6868   SDLoc dl(Index);
6869   // Make sure the index type is big enough to compute in.
6870   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
6871 
6872   EVT EltVT = VecVT.getVectorElementType();
6873 
6874   // Calculate the element offset and add it to the pointer.
6875   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
6876   assert(EltSize * 8 == EltVT.getSizeInBits() &&
6877          "Converting bits to bytes lost precision");
6878 
6879   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
6880 
6881   EVT IdxVT = Index.getValueType();
6882 
6883   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
6884                       DAG.getConstant(EltSize, dl, IdxVT));
6885   return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
6886 }
6887 
6888 //===----------------------------------------------------------------------===//
6889 // Implementation of Emulated TLS Model
6890 //===----------------------------------------------------------------------===//
6891 
6892 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
6893                                                 SelectionDAG &DAG) const {
6894   // Access to address of TLS varialbe xyz is lowered to a function call:
6895   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
6896   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6897   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
6898   SDLoc dl(GA);
6899 
6900   ArgListTy Args;
6901   ArgListEntry Entry;
6902   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
6903   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
6904   StringRef EmuTlsVarName(NameString);
6905   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
6906   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
6907   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
6908   Entry.Ty = VoidPtrType;
6909   Args.push_back(Entry);
6910 
6911   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
6912 
6913   TargetLowering::CallLoweringInfo CLI(DAG);
6914   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
6915   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
6916   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6917 
6918   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
6919   // At last for X86 targets, maybe good for other targets too?
6920   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6921   MFI.setAdjustsStack(true); // Is this only for X86 target?
6922   MFI.setHasCalls(true);
6923 
6924   assert((GA->getOffset() == 0) &&
6925          "Emulated TLS must have zero offset in GlobalAddressSDNode");
6926   return CallResult.first;
6927 }
6928 
6929 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
6930                                                 SelectionDAG &DAG) const {
6931   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
6932   if (!isCtlzFast())
6933     return SDValue();
6934   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6935   SDLoc dl(Op);
6936   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6937     if (C->isNullValue() && CC == ISD::SETEQ) {
6938       EVT VT = Op.getOperand(0).getValueType();
6939       SDValue Zext = Op.getOperand(0);
6940       if (VT.bitsLT(MVT::i32)) {
6941         VT = MVT::i32;
6942         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
6943       }
6944       unsigned Log2b = Log2_32(VT.getSizeInBits());
6945       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
6946       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
6947                                 DAG.getConstant(Log2b, dl, MVT::i32));
6948       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
6949     }
6950   }
6951   return SDValue();
6952 }
6953 
6954 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
6955   unsigned Opcode = Node->getOpcode();
6956   SDValue LHS = Node->getOperand(0);
6957   SDValue RHS = Node->getOperand(1);
6958   EVT VT = LHS.getValueType();
6959   SDLoc dl(Node);
6960 
6961   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
6962   assert(VT.isInteger() && "Expected operands to be integers");
6963 
6964   // usub.sat(a, b) -> umax(a, b) - b
6965   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
6966     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
6967     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
6968   }
6969 
6970   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
6971     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
6972     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
6973     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
6974   }
6975 
6976   unsigned OverflowOp;
6977   switch (Opcode) {
6978   case ISD::SADDSAT:
6979     OverflowOp = ISD::SADDO;
6980     break;
6981   case ISD::UADDSAT:
6982     OverflowOp = ISD::UADDO;
6983     break;
6984   case ISD::SSUBSAT:
6985     OverflowOp = ISD::SSUBO;
6986     break;
6987   case ISD::USUBSAT:
6988     OverflowOp = ISD::USUBO;
6989     break;
6990   default:
6991     llvm_unreachable("Expected method to receive signed or unsigned saturation "
6992                      "addition or subtraction node.");
6993   }
6994 
6995   unsigned BitWidth = LHS.getScalarValueSizeInBits();
6996   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6997   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
6998                                LHS, RHS);
6999   SDValue SumDiff = Result.getValue(0);
7000   SDValue Overflow = Result.getValue(1);
7001   SDValue Zero = DAG.getConstant(0, dl, VT);
7002   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7003 
7004   if (Opcode == ISD::UADDSAT) {
7005     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7006       // (LHS + RHS) | OverflowMask
7007       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7008       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7009     }
7010     // Overflow ? 0xffff.... : (LHS + RHS)
7011     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7012   } else if (Opcode == ISD::USUBSAT) {
7013     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7014       // (LHS - RHS) & ~OverflowMask
7015       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7016       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7017       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7018     }
7019     // Overflow ? 0 : (LHS - RHS)
7020     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7021   } else {
7022     // SatMax -> Overflow && SumDiff < 0
7023     // SatMin -> Overflow && SumDiff >= 0
7024     APInt MinVal = APInt::getSignedMinValue(BitWidth);
7025     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7026     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7027     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7028     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7029     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7030     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7031   }
7032 }
7033 
7034 SDValue
7035 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7036   assert((Node->getOpcode() == ISD::SMULFIX ||
7037           Node->getOpcode() == ISD::UMULFIX ||
7038           Node->getOpcode() == ISD::SMULFIXSAT ||
7039           Node->getOpcode() == ISD::UMULFIXSAT) &&
7040          "Expected a fixed point multiplication opcode");
7041 
7042   SDLoc dl(Node);
7043   SDValue LHS = Node->getOperand(0);
7044   SDValue RHS = Node->getOperand(1);
7045   EVT VT = LHS.getValueType();
7046   unsigned Scale = Node->getConstantOperandVal(2);
7047   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7048                      Node->getOpcode() == ISD::UMULFIXSAT);
7049   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7050                  Node->getOpcode() == ISD::SMULFIXSAT);
7051   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7052   unsigned VTSize = VT.getScalarSizeInBits();
7053 
7054   if (!Scale) {
7055     // [us]mul.fix(a, b, 0) -> mul(a, b)
7056     if (!Saturating) {
7057       if (isOperationLegalOrCustom(ISD::MUL, VT))
7058         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7059     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7060       SDValue Result =
7061           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7062       SDValue Product = Result.getValue(0);
7063       SDValue Overflow = Result.getValue(1);
7064       SDValue Zero = DAG.getConstant(0, dl, VT);
7065 
7066       APInt MinVal = APInt::getSignedMinValue(VTSize);
7067       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7068       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7069       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7070       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7071       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7072       return DAG.getSelect(dl, VT, Overflow, Result, Product);
7073     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7074       SDValue Result =
7075           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7076       SDValue Product = Result.getValue(0);
7077       SDValue Overflow = Result.getValue(1);
7078 
7079       APInt MaxVal = APInt::getMaxValue(VTSize);
7080       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7081       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7082     }
7083   }
7084 
7085   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7086          "Expected scale to be less than the number of bits if signed or at "
7087          "most the number of bits if unsigned.");
7088   assert(LHS.getValueType() == RHS.getValueType() &&
7089          "Expected both operands to be the same type");
7090 
7091   // Get the upper and lower bits of the result.
7092   SDValue Lo, Hi;
7093   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7094   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7095   if (isOperationLegalOrCustom(LoHiOp, VT)) {
7096     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7097     Lo = Result.getValue(0);
7098     Hi = Result.getValue(1);
7099   } else if (isOperationLegalOrCustom(HiOp, VT)) {
7100     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7101     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7102   } else if (VT.isVector()) {
7103     return SDValue();
7104   } else {
7105     report_fatal_error("Unable to expand fixed point multiplication.");
7106   }
7107 
7108   if (Scale == VTSize)
7109     // Result is just the top half since we'd be shifting by the width of the
7110     // operand. Overflow impossible so this works for both UMULFIX and
7111     // UMULFIXSAT.
7112     return Hi;
7113 
7114   // The result will need to be shifted right by the scale since both operands
7115   // are scaled. The result is given to us in 2 halves, so we only want part of
7116   // both in the result.
7117   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7118   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7119                                DAG.getConstant(Scale, dl, ShiftTy));
7120   if (!Saturating)
7121     return Result;
7122 
7123   if (!Signed) {
7124     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7125     // widened multiplication) aren't all zeroes.
7126 
7127     // Saturate to max if ((Hi >> Scale) != 0),
7128     // which is the same as if (Hi > ((1 << Scale) - 1))
7129     APInt MaxVal = APInt::getMaxValue(VTSize);
7130     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7131                                       dl, VT);
7132     Result = DAG.getSelectCC(dl, Hi, LowMask,
7133                              DAG.getConstant(MaxVal, dl, VT), Result,
7134                              ISD::SETUGT);
7135 
7136     return Result;
7137   }
7138 
7139   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7140   // widened multiplication) aren't all ones or all zeroes.
7141 
7142   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7143   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7144 
7145   if (Scale == 0) {
7146     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7147                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
7148     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7149     // Saturated to SatMin if wide product is negative, and SatMax if wide
7150     // product is positive ...
7151     SDValue Zero = DAG.getConstant(0, dl, VT);
7152     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7153                                                ISD::SETLT);
7154     // ... but only if we overflowed.
7155     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7156   }
7157 
7158   //  We handled Scale==0 above so all the bits to examine is in Hi.
7159 
7160   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7161   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7162   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7163                                     dl, VT);
7164   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7165   // Saturate to min if (Hi >> (Scale - 1)) < -1),
7166   // which is the same as if (HI < (-1 << (Scale - 1))
7167   SDValue HighMask =
7168       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7169                       dl, VT);
7170   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7171   return Result;
7172 }
7173 
7174 void TargetLowering::expandUADDSUBO(
7175     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7176   SDLoc dl(Node);
7177   SDValue LHS = Node->getOperand(0);
7178   SDValue RHS = Node->getOperand(1);
7179   bool IsAdd = Node->getOpcode() == ISD::UADDO;
7180 
7181   // If ADD/SUBCARRY is legal, use that instead.
7182   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7183   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7184     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7185     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7186                                     { LHS, RHS, CarryIn });
7187     Result = SDValue(NodeCarry.getNode(), 0);
7188     Overflow = SDValue(NodeCarry.getNode(), 1);
7189     return;
7190   }
7191 
7192   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7193                             LHS.getValueType(), LHS, RHS);
7194 
7195   EVT ResultType = Node->getValueType(1);
7196   EVT SetCCType = getSetCCResultType(
7197       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7198   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7199   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7200   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7201 }
7202 
7203 void TargetLowering::expandSADDSUBO(
7204     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7205   SDLoc dl(Node);
7206   SDValue LHS = Node->getOperand(0);
7207   SDValue RHS = Node->getOperand(1);
7208   bool IsAdd = Node->getOpcode() == ISD::SADDO;
7209 
7210   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7211                             LHS.getValueType(), LHS, RHS);
7212 
7213   EVT ResultType = Node->getValueType(1);
7214   EVT OType = getSetCCResultType(
7215       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7216 
7217   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7218   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7219   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7220     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7221     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7222     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7223     return;
7224   }
7225 
7226   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7227 
7228   // For an addition, the result should be less than one of the operands (LHS)
7229   // if and only if the other operand (RHS) is negative, otherwise there will
7230   // be overflow.
7231   // For a subtraction, the result should be less than one of the operands
7232   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7233   // otherwise there will be overflow.
7234   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7235   SDValue ConditionRHS =
7236       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7237 
7238   Overflow = DAG.getBoolExtOrTrunc(
7239       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7240       ResultType, ResultType);
7241 }
7242 
7243 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7244                                 SDValue &Overflow, SelectionDAG &DAG) const {
7245   SDLoc dl(Node);
7246   EVT VT = Node->getValueType(0);
7247   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7248   SDValue LHS = Node->getOperand(0);
7249   SDValue RHS = Node->getOperand(1);
7250   bool isSigned = Node->getOpcode() == ISD::SMULO;
7251 
7252   // For power-of-two multiplications we can use a simpler shift expansion.
7253   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7254     const APInt &C = RHSC->getAPIntValue();
7255     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7256     if (C.isPowerOf2()) {
7257       // smulo(x, signed_min) is same as umulo(x, signed_min).
7258       bool UseArithShift = isSigned && !C.isMinSignedValue();
7259       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7260       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7261       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7262       Overflow = DAG.getSetCC(dl, SetCCVT,
7263           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7264                       dl, VT, Result, ShiftAmt),
7265           LHS, ISD::SETNE);
7266       return true;
7267     }
7268   }
7269 
7270   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7271   if (VT.isVector())
7272     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7273                               VT.getVectorNumElements());
7274 
7275   SDValue BottomHalf;
7276   SDValue TopHalf;
7277   static const unsigned Ops[2][3] =
7278       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7279         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7280   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7281     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7282     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7283   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7284     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7285                              RHS);
7286     TopHalf = BottomHalf.getValue(1);
7287   } else if (isTypeLegal(WideVT)) {
7288     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7289     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7290     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7291     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7292     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7293         getShiftAmountTy(WideVT, DAG.getDataLayout()));
7294     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7295                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7296   } else {
7297     if (VT.isVector())
7298       return false;
7299 
7300     // We can fall back to a libcall with an illegal type for the MUL if we
7301     // have a libcall big enough.
7302     // Also, we can fall back to a division in some cases, but that's a big
7303     // performance hit in the general case.
7304     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7305     if (WideVT == MVT::i16)
7306       LC = RTLIB::MUL_I16;
7307     else if (WideVT == MVT::i32)
7308       LC = RTLIB::MUL_I32;
7309     else if (WideVT == MVT::i64)
7310       LC = RTLIB::MUL_I64;
7311     else if (WideVT == MVT::i128)
7312       LC = RTLIB::MUL_I128;
7313     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7314 
7315     SDValue HiLHS;
7316     SDValue HiRHS;
7317     if (isSigned) {
7318       // The high part is obtained by SRA'ing all but one of the bits of low
7319       // part.
7320       unsigned LoSize = VT.getSizeInBits();
7321       HiLHS =
7322           DAG.getNode(ISD::SRA, dl, VT, LHS,
7323                       DAG.getConstant(LoSize - 1, dl,
7324                                       getPointerTy(DAG.getDataLayout())));
7325       HiRHS =
7326           DAG.getNode(ISD::SRA, dl, VT, RHS,
7327                       DAG.getConstant(LoSize - 1, dl,
7328                                       getPointerTy(DAG.getDataLayout())));
7329     } else {
7330         HiLHS = DAG.getConstant(0, dl, VT);
7331         HiRHS = DAG.getConstant(0, dl, VT);
7332     }
7333 
7334     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7335     // pre-lowered to the correct types. This all depends upon WideVT not
7336     // being a legal type for the architecture and thus has to be split to
7337     // two arguments.
7338     SDValue Ret;
7339     TargetLowering::MakeLibCallOptions CallOptions;
7340     CallOptions.setSExt(isSigned);
7341     CallOptions.setIsPostTypeLegalization(true);
7342     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7343       // Halves of WideVT are packed into registers in different order
7344       // depending on platform endianness. This is usually handled by
7345       // the C calling convention, but we can't defer to it in
7346       // the legalizer.
7347       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7348       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7349     } else {
7350       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7351       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7352     }
7353     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7354            "Ret value is a collection of constituent nodes holding result.");
7355     if (DAG.getDataLayout().isLittleEndian()) {
7356       // Same as above.
7357       BottomHalf = Ret.getOperand(0);
7358       TopHalf = Ret.getOperand(1);
7359     } else {
7360       BottomHalf = Ret.getOperand(1);
7361       TopHalf = Ret.getOperand(0);
7362     }
7363   }
7364 
7365   Result = BottomHalf;
7366   if (isSigned) {
7367     SDValue ShiftAmt = DAG.getConstant(
7368         VT.getScalarSizeInBits() - 1, dl,
7369         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7370     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7371     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7372   } else {
7373     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7374                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7375   }
7376 
7377   // Truncate the result if SetCC returns a larger type than needed.
7378   EVT RType = Node->getValueType(1);
7379   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7380     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7381 
7382   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7383          "Unexpected result type for S/UMULO legalization");
7384   return true;
7385 }
7386 
7387 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7388   SDLoc dl(Node);
7389   bool NoNaN = Node->getFlags().hasNoNaNs();
7390   unsigned BaseOpcode = 0;
7391   switch (Node->getOpcode()) {
7392   default: llvm_unreachable("Expected VECREDUCE opcode");
7393   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7394   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7395   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7396   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7397   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7398   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7399   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7400   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7401   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7402   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7403   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7404   case ISD::VECREDUCE_FMAX:
7405     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7406     break;
7407   case ISD::VECREDUCE_FMIN:
7408     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7409     break;
7410   }
7411 
7412   SDValue Op = Node->getOperand(0);
7413   EVT VT = Op.getValueType();
7414 
7415   // Try to use a shuffle reduction for power of two vectors.
7416   if (VT.isPow2VectorType()) {
7417     while (VT.getVectorNumElements() > 1) {
7418       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7419       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7420         break;
7421 
7422       SDValue Lo, Hi;
7423       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7424       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7425       VT = HalfVT;
7426     }
7427   }
7428 
7429   EVT EltVT = VT.getVectorElementType();
7430   unsigned NumElts = VT.getVectorNumElements();
7431 
7432   SmallVector<SDValue, 8> Ops;
7433   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7434 
7435   SDValue Res = Ops[0];
7436   for (unsigned i = 1; i < NumElts; i++)
7437     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7438 
7439   // Result type may be wider than element type.
7440   if (EltVT != Node->getValueType(0))
7441     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7442   return Res;
7443 }
7444