1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // Conservatively require the attributes of the call to match those of
56   // the return. Ignore NoAlias and NonNull because they don't affect the
57   // call sequence.
58   AttributeList CallerAttrs = F.getAttributes();
59   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
60           .removeAttribute(Attribute::NoAlias)
61           .removeAttribute(Attribute::NonNull)
62           .hasAttributes())
63     return false;
64 
65   // It's not safe to eliminate the sign / zero extension of the return value.
66   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
67       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
68     return false;
69 
70   // Check if the only use is a function return node.
71   return isUsedByReturnOnly(Node, Chain);
72 }
73 
74 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
75     const uint32_t *CallerPreservedMask,
76     const SmallVectorImpl<CCValAssign> &ArgLocs,
77     const SmallVectorImpl<SDValue> &OutVals) const {
78   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
79     const CCValAssign &ArgLoc = ArgLocs[I];
80     if (!ArgLoc.isRegLoc())
81       continue;
82     Register Reg = ArgLoc.getLocReg();
83     // Only look at callee saved registers.
84     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
85       continue;
86     // Check that we pass the value used for the caller.
87     // (We look for a CopyFromReg reading a virtual register that is used
88     //  for the function live-in value of register Reg)
89     SDValue Value = OutVals[I];
90     if (Value->getOpcode() != ISD::CopyFromReg)
91       return false;
92     unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
93     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
94       return false;
95   }
96   return true;
97 }
98 
99 /// Set CallLoweringInfo attribute flags based on a call instruction
100 /// and called function attributes.
101 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
102                                                      unsigned ArgIdx) {
103   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
104   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
105   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
106   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
107   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
108   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
109   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
110   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
111   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
112   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
113   Alignment = Call->getParamAlignment(ArgIdx);
114   ByValType = nullptr;
115   if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
116     ByValType = Call->getParamByValType(ArgIdx);
117 }
118 
119 /// Generate a libcall taking the given operands as arguments and returning a
120 /// result of type RetVT.
121 std::pair<SDValue, SDValue>
122 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
123                             ArrayRef<SDValue> Ops,
124                             MakeLibCallOptions CallOptions,
125                             const SDLoc &dl,
126                             SDValue InChain) const {
127   if (!InChain)
128     InChain = DAG.getEntryNode();
129 
130   TargetLowering::ArgListTy Args;
131   Args.reserve(Ops.size());
132 
133   TargetLowering::ArgListEntry Entry;
134   for (unsigned i = 0; i < Ops.size(); ++i) {
135     SDValue NewOp = Ops[i];
136     Entry.Node = NewOp;
137     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
138     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
139                                                  CallOptions.IsSExt);
140     Entry.IsZExt = !Entry.IsSExt;
141 
142     if (CallOptions.IsSoften &&
143         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
144       Entry.IsSExt = Entry.IsZExt = false;
145     }
146     Args.push_back(Entry);
147   }
148 
149   if (LC == RTLIB::UNKNOWN_LIBCALL)
150     report_fatal_error("Unsupported library call operation!");
151   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
152                                          getPointerTy(DAG.getDataLayout()));
153 
154   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
155   TargetLowering::CallLoweringInfo CLI(DAG);
156   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
157   bool zeroExtend = !signExtend;
158 
159   if (CallOptions.IsSoften &&
160       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
161     signExtend = zeroExtend = false;
162   }
163 
164   CLI.setDebugLoc(dl)
165       .setChain(InChain)
166       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
167       .setNoReturn(CallOptions.DoesNotReturn)
168       .setDiscardResult(!CallOptions.IsReturnValueUsed)
169       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
170       .setSExtResult(signExtend)
171       .setZExtResult(zeroExtend);
172   return LowerCallTo(CLI);
173 }
174 
175 bool
176 TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
177                                          unsigned Limit, uint64_t Size,
178                                          unsigned DstAlign, unsigned SrcAlign,
179                                          bool IsMemset,
180                                          bool ZeroMemset,
181                                          bool MemcpyStrSrc,
182                                          bool AllowOverlap,
183                                          unsigned DstAS, unsigned SrcAS,
184                                          const AttributeList &FuncAttributes) const {
185   // If 'SrcAlign' is zero, that means the memory operation does not need to
186   // load the value, i.e. memset or memcpy from constant string. Otherwise,
187   // it's the inferred alignment of the source. 'DstAlign', on the other hand,
188   // is the specified alignment of the memory operation. If it is zero, that
189   // means it's possible to change the alignment of the destination.
190   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
191   // not need to be loaded.
192   if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
193     return false;
194 
195   EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
196                                IsMemset, ZeroMemset, MemcpyStrSrc,
197                                FuncAttributes);
198 
199   if (VT == MVT::Other) {
200     // Use the largest integer type whose alignment constraints are satisfied.
201     // We only need to check DstAlign here as SrcAlign is always greater or
202     // equal to DstAlign (or zero).
203     VT = MVT::i64;
204     while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
205            !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
206       VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
207     assert(VT.isInteger());
208 
209     // Find the largest legal integer type.
210     MVT LVT = MVT::i64;
211     while (!isTypeLegal(LVT))
212       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
213     assert(LVT.isInteger());
214 
215     // If the type we've chosen is larger than the largest legal integer type
216     // then use that instead.
217     if (VT.bitsGT(LVT))
218       VT = LVT;
219   }
220 
221   unsigned NumMemOps = 0;
222   while (Size != 0) {
223     unsigned VTSize = VT.getSizeInBits() / 8;
224     while (VTSize > Size) {
225       // For now, only use non-vector load / store's for the left-over pieces.
226       EVT NewVT = VT;
227       unsigned NewVTSize;
228 
229       bool Found = false;
230       if (VT.isVector() || VT.isFloatingPoint()) {
231         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
232         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
233             isSafeMemOpType(NewVT.getSimpleVT()))
234           Found = true;
235         else if (NewVT == MVT::i64 &&
236                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
237                  isSafeMemOpType(MVT::f64)) {
238           // i64 is usually not legal on 32-bit targets, but f64 may be.
239           NewVT = MVT::f64;
240           Found = true;
241         }
242       }
243 
244       if (!Found) {
245         do {
246           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
247           if (NewVT == MVT::i8)
248             break;
249         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
250       }
251       NewVTSize = NewVT.getSizeInBits() / 8;
252 
253       // If the new VT cannot cover all of the remaining bits, then consider
254       // issuing a (or a pair of) unaligned and overlapping load / store.
255       bool Fast;
256       if (NumMemOps && AllowOverlap && NewVTSize < Size &&
257           allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
258                                          MachineMemOperand::MONone, &Fast) &&
259           Fast)
260         VTSize = Size;
261       else {
262         VT = NewVT;
263         VTSize = NewVTSize;
264       }
265     }
266 
267     if (++NumMemOps > Limit)
268       return false;
269 
270     MemOps.push_back(VT);
271     Size -= VTSize;
272   }
273 
274   return true;
275 }
276 
277 /// Soften the operands of a comparison. This code is shared among BR_CC,
278 /// SELECT_CC, and SETCC handlers.
279 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
280                                          SDValue &NewLHS, SDValue &NewRHS,
281                                          ISD::CondCode &CCCode,
282                                          const SDLoc &dl, const SDValue OldLHS,
283                                          const SDValue OldRHS) const {
284   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
285          && "Unsupported setcc type!");
286 
287   // Expand into one or more soft-fp libcall(s).
288   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
289   bool ShouldInvertCC = false;
290   switch (CCCode) {
291   case ISD::SETEQ:
292   case ISD::SETOEQ:
293     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
294           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
295           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
296     break;
297   case ISD::SETNE:
298   case ISD::SETUNE:
299     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
300           (VT == MVT::f64) ? RTLIB::UNE_F64 :
301           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
302     break;
303   case ISD::SETGE:
304   case ISD::SETOGE:
305     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
306           (VT == MVT::f64) ? RTLIB::OGE_F64 :
307           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
308     break;
309   case ISD::SETLT:
310   case ISD::SETOLT:
311     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
312           (VT == MVT::f64) ? RTLIB::OLT_F64 :
313           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
314     break;
315   case ISD::SETLE:
316   case ISD::SETOLE:
317     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
318           (VT == MVT::f64) ? RTLIB::OLE_F64 :
319           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
320     break;
321   case ISD::SETGT:
322   case ISD::SETOGT:
323     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
324           (VT == MVT::f64) ? RTLIB::OGT_F64 :
325           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
326     break;
327   case ISD::SETUO:
328     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
329           (VT == MVT::f64) ? RTLIB::UO_F64 :
330           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
331     break;
332   case ISD::SETO:
333     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
334           (VT == MVT::f64) ? RTLIB::O_F64 :
335           (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
336     break;
337   case ISD::SETONE:
338     // SETONE = SETOLT | SETOGT
339     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340           (VT == MVT::f64) ? RTLIB::OLT_F64 :
341           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342     LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
343           (VT == MVT::f64) ? RTLIB::OGT_F64 :
344           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
345     break;
346   case ISD::SETUEQ:
347     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
348           (VT == MVT::f64) ? RTLIB::UO_F64 :
349           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
350     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
351           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
352           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
353     break;
354   default:
355     // Invert CC for unordered comparisons
356     ShouldInvertCC = true;
357     switch (CCCode) {
358     case ISD::SETULT:
359       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
360             (VT == MVT::f64) ? RTLIB::OGE_F64 :
361             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
362       break;
363     case ISD::SETULE:
364       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
365             (VT == MVT::f64) ? RTLIB::OGT_F64 :
366             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
367       break;
368     case ISD::SETUGT:
369       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370             (VT == MVT::f64) ? RTLIB::OLE_F64 :
371             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372       break;
373     case ISD::SETUGE:
374       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
375             (VT == MVT::f64) ? RTLIB::OLT_F64 :
376             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
377       break;
378     default: llvm_unreachable("Do not know how to soften this setcc!");
379     }
380   }
381 
382   // Use the target specific return value for comparions lib calls.
383   EVT RetVT = getCmpLibcallReturnType();
384   SDValue Ops[2] = {NewLHS, NewRHS};
385   TargetLowering::MakeLibCallOptions CallOptions;
386   EVT OpsVT[2] = { OldLHS.getValueType(),
387                    OldRHS.getValueType() };
388   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
389   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
390   NewRHS = DAG.getConstant(0, dl, RetVT);
391 
392   CCCode = getCmpLibcallCC(LC1);
393   if (ShouldInvertCC) {
394     assert(RetVT.isInteger());
395     CCCode = getSetCCInverse(CCCode, RetVT);
396   }
397 
398   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
399     SDValue Tmp = DAG.getNode(
400         ISD::SETCC, dl,
401         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
402         NewLHS, NewRHS, DAG.getCondCode(CCCode));
403     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
404     NewLHS = DAG.getNode(
405         ISD::SETCC, dl,
406         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
407         NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
408     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
409     NewRHS = SDValue();
410   }
411 }
412 
413 /// Return the entry encoding for a jump table in the current function. The
414 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
415 unsigned TargetLowering::getJumpTableEncoding() const {
416   // In non-pic modes, just use the address of a block.
417   if (!isPositionIndependent())
418     return MachineJumpTableInfo::EK_BlockAddress;
419 
420   // In PIC mode, if the target supports a GPRel32 directive, use it.
421   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
422     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
423 
424   // Otherwise, use a label difference.
425   return MachineJumpTableInfo::EK_LabelDifference32;
426 }
427 
428 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
429                                                  SelectionDAG &DAG) const {
430   // If our PIC model is GP relative, use the global offset table as the base.
431   unsigned JTEncoding = getJumpTableEncoding();
432 
433   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
434       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
435     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
436 
437   return Table;
438 }
439 
440 /// This returns the relocation base for the given PIC jumptable, the same as
441 /// getPICJumpTableRelocBase, but as an MCExpr.
442 const MCExpr *
443 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
444                                              unsigned JTI,MCContext &Ctx) const{
445   // The normal PIC reloc base is the label at the start of the jump table.
446   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
447 }
448 
449 bool
450 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
451   const TargetMachine &TM = getTargetMachine();
452   const GlobalValue *GV = GA->getGlobal();
453 
454   // If the address is not even local to this DSO we will have to load it from
455   // a got and then add the offset.
456   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
457     return false;
458 
459   // If the code is position independent we will have to add a base register.
460   if (isPositionIndependent())
461     return false;
462 
463   // Otherwise we can do it.
464   return true;
465 }
466 
467 //===----------------------------------------------------------------------===//
468 //  Optimization Methods
469 //===----------------------------------------------------------------------===//
470 
471 /// If the specified instruction has a constant integer operand and there are
472 /// bits set in that constant that are not demanded, then clear those bits and
473 /// return true.
474 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
475                                             TargetLoweringOpt &TLO) const {
476   SDLoc DL(Op);
477   unsigned Opcode = Op.getOpcode();
478 
479   // Do target-specific constant optimization.
480   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
481     return TLO.New.getNode();
482 
483   // FIXME: ISD::SELECT, ISD::SELECT_CC
484   switch (Opcode) {
485   default:
486     break;
487   case ISD::XOR:
488   case ISD::AND:
489   case ISD::OR: {
490     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
491     if (!Op1C)
492       return false;
493 
494     // If this is a 'not' op, don't touch it because that's a canonical form.
495     const APInt &C = Op1C->getAPIntValue();
496     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
497       return false;
498 
499     if (!C.isSubsetOf(Demanded)) {
500       EVT VT = Op.getValueType();
501       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
502       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
503       return TLO.CombineTo(Op, NewOp);
504     }
505 
506     break;
507   }
508   }
509 
510   return false;
511 }
512 
513 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
514 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
515 /// generalized for targets with other types of implicit widening casts.
516 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
517                                       const APInt &Demanded,
518                                       TargetLoweringOpt &TLO) const {
519   assert(Op.getNumOperands() == 2 &&
520          "ShrinkDemandedOp only supports binary operators!");
521   assert(Op.getNode()->getNumValues() == 1 &&
522          "ShrinkDemandedOp only supports nodes with one result!");
523 
524   SelectionDAG &DAG = TLO.DAG;
525   SDLoc dl(Op);
526 
527   // Early return, as this function cannot handle vector types.
528   if (Op.getValueType().isVector())
529     return false;
530 
531   // Don't do this if the node has another user, which may require the
532   // full value.
533   if (!Op.getNode()->hasOneUse())
534     return false;
535 
536   // Search for the smallest integer type with free casts to and from
537   // Op's type. For expedience, just check power-of-2 integer types.
538   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
539   unsigned DemandedSize = Demanded.getActiveBits();
540   unsigned SmallVTBits = DemandedSize;
541   if (!isPowerOf2_32(SmallVTBits))
542     SmallVTBits = NextPowerOf2(SmallVTBits);
543   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
544     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
545     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
546         TLI.isZExtFree(SmallVT, Op.getValueType())) {
547       // We found a type with free casts.
548       SDValue X = DAG.getNode(
549           Op.getOpcode(), dl, SmallVT,
550           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
551           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
552       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
553       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
554       return TLO.CombineTo(Op, Z);
555     }
556   }
557   return false;
558 }
559 
560 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
561                                           DAGCombinerInfo &DCI) const {
562   SelectionDAG &DAG = DCI.DAG;
563   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
564                         !DCI.isBeforeLegalizeOps());
565   KnownBits Known;
566 
567   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
568   if (Simplified) {
569     DCI.AddToWorklist(Op.getNode());
570     DCI.CommitTargetLoweringOpt(TLO);
571   }
572   return Simplified;
573 }
574 
575 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
576                                           KnownBits &Known,
577                                           TargetLoweringOpt &TLO,
578                                           unsigned Depth,
579                                           bool AssumeSingleUse) const {
580   EVT VT = Op.getValueType();
581   APInt DemandedElts = VT.isVector()
582                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
583                            : APInt(1, 1);
584   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
585                               AssumeSingleUse);
586 }
587 
588 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
589 // TODO: Under what circumstances can we create nodes? Constant folding?
590 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
591     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
592     SelectionDAG &DAG, unsigned Depth) const {
593   // Limit search depth.
594   if (Depth >= SelectionDAG::MaxRecursionDepth)
595     return SDValue();
596 
597   // Ignore UNDEFs.
598   if (Op.isUndef())
599     return SDValue();
600 
601   // Not demanding any bits/elts from Op.
602   if (DemandedBits == 0 || DemandedElts == 0)
603     return DAG.getUNDEF(Op.getValueType());
604 
605   unsigned NumElts = DemandedElts.getBitWidth();
606   KnownBits LHSKnown, RHSKnown;
607   switch (Op.getOpcode()) {
608   case ISD::BITCAST: {
609     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
610     EVT SrcVT = Src.getValueType();
611     EVT DstVT = Op.getValueType();
612     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
613     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
614 
615     if (NumSrcEltBits == NumDstEltBits)
616       if (SDValue V = SimplifyMultipleUseDemandedBits(
617               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
618         return DAG.getBitcast(DstVT, V);
619 
620     // TODO - bigendian once we have test coverage.
621     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
622         DAG.getDataLayout().isLittleEndian()) {
623       unsigned Scale = NumDstEltBits / NumSrcEltBits;
624       unsigned NumSrcElts = SrcVT.getVectorNumElements();
625       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
626       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
627       for (unsigned i = 0; i != Scale; ++i) {
628         unsigned Offset = i * NumSrcEltBits;
629         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
630         if (!Sub.isNullValue()) {
631           DemandedSrcBits |= Sub;
632           for (unsigned j = 0; j != NumElts; ++j)
633             if (DemandedElts[j])
634               DemandedSrcElts.setBit((j * Scale) + i);
635         }
636       }
637 
638       if (SDValue V = SimplifyMultipleUseDemandedBits(
639               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
640         return DAG.getBitcast(DstVT, V);
641     }
642 
643     // TODO - bigendian once we have test coverage.
644     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
645         DAG.getDataLayout().isLittleEndian()) {
646       unsigned Scale = NumSrcEltBits / NumDstEltBits;
647       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
648       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
649       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
650       for (unsigned i = 0; i != NumElts; ++i)
651         if (DemandedElts[i]) {
652           unsigned Offset = (i % Scale) * NumDstEltBits;
653           DemandedSrcBits.insertBits(DemandedBits, Offset);
654           DemandedSrcElts.setBit(i / Scale);
655         }
656 
657       if (SDValue V = SimplifyMultipleUseDemandedBits(
658               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
659         return DAG.getBitcast(DstVT, V);
660     }
661 
662     break;
663   }
664   case ISD::AND: {
665     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
666     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
667 
668     // If all of the demanded bits are known 1 on one side, return the other.
669     // These bits cannot contribute to the result of the 'and' in this
670     // context.
671     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
672       return Op.getOperand(0);
673     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
674       return Op.getOperand(1);
675     break;
676   }
677   case ISD::OR: {
678     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
679     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
680 
681     // If all of the demanded bits are known zero on one side, return the
682     // other.  These bits cannot contribute to the result of the 'or' in this
683     // context.
684     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
685       return Op.getOperand(0);
686     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
687       return Op.getOperand(1);
688     break;
689   }
690   case ISD::XOR: {
691     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
692     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
693 
694     // If all of the demanded bits are known zero on one side, return the
695     // other.
696     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
697       return Op.getOperand(0);
698     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
699       return Op.getOperand(1);
700     break;
701   }
702   case ISD::SIGN_EXTEND_INREG: {
703     // If none of the extended bits are demanded, eliminate the sextinreg.
704     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
705     if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
706       return Op.getOperand(0);
707     break;
708   }
709   case ISD::INSERT_VECTOR_ELT: {
710     // If we don't demand the inserted element, return the base vector.
711     SDValue Vec = Op.getOperand(0);
712     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
713     EVT VecVT = Vec.getValueType();
714     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
715         !DemandedElts[CIdx->getZExtValue()])
716       return Vec;
717     break;
718   }
719   case ISD::VECTOR_SHUFFLE: {
720     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
721 
722     // If all the demanded elts are from one operand and are inline,
723     // then we can use the operand directly.
724     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
725     for (unsigned i = 0; i != NumElts; ++i) {
726       int M = ShuffleMask[i];
727       if (M < 0 || !DemandedElts[i])
728         continue;
729       AllUndef = false;
730       IdentityLHS &= (M == (int)i);
731       IdentityRHS &= ((M - NumElts) == i);
732     }
733 
734     if (AllUndef)
735       return DAG.getUNDEF(Op.getValueType());
736     if (IdentityLHS)
737       return Op.getOperand(0);
738     if (IdentityRHS)
739       return Op.getOperand(1);
740     break;
741   }
742   default:
743     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
744       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
745               Op, DemandedBits, DemandedElts, DAG, Depth))
746         return V;
747     break;
748   }
749   return SDValue();
750 }
751 
752 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
753 /// result of Op are ever used downstream. If we can use this information to
754 /// simplify Op, create a new simplified DAG node and return true, returning the
755 /// original and new nodes in Old and New. Otherwise, analyze the expression and
756 /// return a mask of Known bits for the expression (used to simplify the
757 /// caller).  The Known bits may only be accurate for those bits in the
758 /// OriginalDemandedBits and OriginalDemandedElts.
759 bool TargetLowering::SimplifyDemandedBits(
760     SDValue Op, const APInt &OriginalDemandedBits,
761     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
762     unsigned Depth, bool AssumeSingleUse) const {
763   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
764   assert(Op.getScalarValueSizeInBits() == BitWidth &&
765          "Mask size mismatches value type size!");
766 
767   unsigned NumElts = OriginalDemandedElts.getBitWidth();
768   assert((!Op.getValueType().isVector() ||
769           NumElts == Op.getValueType().getVectorNumElements()) &&
770          "Unexpected vector size");
771 
772   APInt DemandedBits = OriginalDemandedBits;
773   APInt DemandedElts = OriginalDemandedElts;
774   SDLoc dl(Op);
775   auto &DL = TLO.DAG.getDataLayout();
776 
777   // Don't know anything.
778   Known = KnownBits(BitWidth);
779 
780   // Undef operand.
781   if (Op.isUndef())
782     return false;
783 
784   if (Op.getOpcode() == ISD::Constant) {
785     // We know all of the bits for a constant!
786     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
787     Known.Zero = ~Known.One;
788     return false;
789   }
790 
791   // Other users may use these bits.
792   EVT VT = Op.getValueType();
793   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
794     if (Depth != 0) {
795       // If not at the root, Just compute the Known bits to
796       // simplify things downstream.
797       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
798       return false;
799     }
800     // If this is the root being simplified, allow it to have multiple uses,
801     // just set the DemandedBits/Elts to all bits.
802     DemandedBits = APInt::getAllOnesValue(BitWidth);
803     DemandedElts = APInt::getAllOnesValue(NumElts);
804   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
805     // Not demanding any bits/elts from Op.
806     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
807   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
808     // Limit search depth.
809     return false;
810   }
811 
812   KnownBits Known2, KnownOut;
813   switch (Op.getOpcode()) {
814   case ISD::TargetConstant:
815     llvm_unreachable("Can't simplify this node");
816   case ISD::SCALAR_TO_VECTOR: {
817     if (!DemandedElts[0])
818       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
819 
820     KnownBits SrcKnown;
821     SDValue Src = Op.getOperand(0);
822     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
823     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
824     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
825       return true;
826     Known = SrcKnown.zextOrTrunc(BitWidth, false);
827     break;
828   }
829   case ISD::BUILD_VECTOR:
830     // Collect the known bits that are shared by every demanded element.
831     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
832     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
833     return false; // Don't fall through, will infinitely loop.
834   case ISD::LOAD: {
835     LoadSDNode *LD = cast<LoadSDNode>(Op);
836     if (getTargetConstantFromLoad(LD)) {
837       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
838       return false; // Don't fall through, will infinitely loop.
839     }
840     break;
841   }
842   case ISD::INSERT_VECTOR_ELT: {
843     SDValue Vec = Op.getOperand(0);
844     SDValue Scl = Op.getOperand(1);
845     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
846     EVT VecVT = Vec.getValueType();
847 
848     // If index isn't constant, assume we need all vector elements AND the
849     // inserted element.
850     APInt DemandedVecElts(DemandedElts);
851     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
852       unsigned Idx = CIdx->getZExtValue();
853       DemandedVecElts.clearBit(Idx);
854 
855       // Inserted element is not required.
856       if (!DemandedElts[Idx])
857         return TLO.CombineTo(Op, Vec);
858     }
859 
860     KnownBits KnownScl;
861     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
862     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
863     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
864       return true;
865 
866     Known = KnownScl.zextOrTrunc(BitWidth, false);
867 
868     KnownBits KnownVec;
869     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
870                              Depth + 1))
871       return true;
872 
873     if (!!DemandedVecElts) {
874       Known.One &= KnownVec.One;
875       Known.Zero &= KnownVec.Zero;
876     }
877 
878     return false;
879   }
880   case ISD::INSERT_SUBVECTOR: {
881     SDValue Base = Op.getOperand(0);
882     SDValue Sub = Op.getOperand(1);
883     EVT SubVT = Sub.getValueType();
884     unsigned NumSubElts = SubVT.getVectorNumElements();
885 
886     // If index isn't constant, assume we need the original demanded base
887     // elements and ALL the inserted subvector elements.
888     APInt BaseElts = DemandedElts;
889     APInt SubElts = APInt::getAllOnesValue(NumSubElts);
890     if (isa<ConstantSDNode>(Op.getOperand(2))) {
891       const APInt &Idx = Op.getConstantOperandAPInt(2);
892       if (Idx.ule(NumElts - NumSubElts)) {
893         unsigned SubIdx = Idx.getZExtValue();
894         SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
895         BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
896       }
897     }
898 
899     KnownBits KnownSub, KnownBase;
900     if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
901                              Depth + 1))
902       return true;
903     if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
904                              Depth + 1))
905       return true;
906 
907     Known.Zero.setAllBits();
908     Known.One.setAllBits();
909     if (!!SubElts) {
910         Known.One &= KnownSub.One;
911         Known.Zero &= KnownSub.Zero;
912     }
913     if (!!BaseElts) {
914         Known.One &= KnownBase.One;
915         Known.Zero &= KnownBase.Zero;
916     }
917     break;
918   }
919   case ISD::EXTRACT_SUBVECTOR: {
920     // If index isn't constant, assume we need all the source vector elements.
921     SDValue Src = Op.getOperand(0);
922     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
923     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
924     APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
925     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
926       // Offset the demanded elts by the subvector index.
927       uint64_t Idx = SubIdx->getZExtValue();
928       SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
929     }
930     if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
931       return true;
932     break;
933   }
934   case ISD::CONCAT_VECTORS: {
935     Known.Zero.setAllBits();
936     Known.One.setAllBits();
937     EVT SubVT = Op.getOperand(0).getValueType();
938     unsigned NumSubVecs = Op.getNumOperands();
939     unsigned NumSubElts = SubVT.getVectorNumElements();
940     for (unsigned i = 0; i != NumSubVecs; ++i) {
941       APInt DemandedSubElts =
942           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
943       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
944                                Known2, TLO, Depth + 1))
945         return true;
946       // Known bits are shared by every demanded subvector element.
947       if (!!DemandedSubElts) {
948         Known.One &= Known2.One;
949         Known.Zero &= Known2.Zero;
950       }
951     }
952     break;
953   }
954   case ISD::VECTOR_SHUFFLE: {
955     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
956 
957     // Collect demanded elements from shuffle operands..
958     APInt DemandedLHS(NumElts, 0);
959     APInt DemandedRHS(NumElts, 0);
960     for (unsigned i = 0; i != NumElts; ++i) {
961       if (!DemandedElts[i])
962         continue;
963       int M = ShuffleMask[i];
964       if (M < 0) {
965         // For UNDEF elements, we don't know anything about the common state of
966         // the shuffle result.
967         DemandedLHS.clearAllBits();
968         DemandedRHS.clearAllBits();
969         break;
970       }
971       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
972       if (M < (int)NumElts)
973         DemandedLHS.setBit(M);
974       else
975         DemandedRHS.setBit(M - NumElts);
976     }
977 
978     if (!!DemandedLHS || !!DemandedRHS) {
979       SDValue Op0 = Op.getOperand(0);
980       SDValue Op1 = Op.getOperand(1);
981 
982       Known.Zero.setAllBits();
983       Known.One.setAllBits();
984       if (!!DemandedLHS) {
985         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
986                                  Depth + 1))
987           return true;
988         Known.One &= Known2.One;
989         Known.Zero &= Known2.Zero;
990       }
991       if (!!DemandedRHS) {
992         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
993                                  Depth + 1))
994           return true;
995         Known.One &= Known2.One;
996         Known.Zero &= Known2.Zero;
997       }
998 
999       // Attempt to avoid multi-use ops if we don't need anything from them.
1000       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1001           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1002       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1003           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1004       if (DemandedOp0 || DemandedOp1) {
1005         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1006         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1007         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1008         return TLO.CombineTo(Op, NewOp);
1009       }
1010     }
1011     break;
1012   }
1013   case ISD::AND: {
1014     SDValue Op0 = Op.getOperand(0);
1015     SDValue Op1 = Op.getOperand(1);
1016 
1017     // If the RHS is a constant, check to see if the LHS would be zero without
1018     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1019     // simplify the LHS, here we're using information from the LHS to simplify
1020     // the RHS.
1021     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1022       // Do not increment Depth here; that can cause an infinite loop.
1023       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1024       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1025       if ((LHSKnown.Zero & DemandedBits) ==
1026           (~RHSC->getAPIntValue() & DemandedBits))
1027         return TLO.CombineTo(Op, Op0);
1028 
1029       // If any of the set bits in the RHS are known zero on the LHS, shrink
1030       // the constant.
1031       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1032         return true;
1033 
1034       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1035       // constant, but if this 'and' is only clearing bits that were just set by
1036       // the xor, then this 'and' can be eliminated by shrinking the mask of
1037       // the xor. For example, for a 32-bit X:
1038       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1039       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1040           LHSKnown.One == ~RHSC->getAPIntValue()) {
1041         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1042         return TLO.CombineTo(Op, Xor);
1043       }
1044     }
1045 
1046     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1047                              Depth + 1))
1048       return true;
1049     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1050     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1051                              Known2, TLO, Depth + 1))
1052       return true;
1053     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1054 
1055     // Attempt to avoid multi-use ops if we don't need anything from them.
1056     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1057       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1058           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1059       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1060           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1061       if (DemandedOp0 || DemandedOp1) {
1062         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1063         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1064         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1065         return TLO.CombineTo(Op, NewOp);
1066       }
1067     }
1068 
1069     // If all of the demanded bits are known one on one side, return the other.
1070     // These bits cannot contribute to the result of the 'and'.
1071     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1072       return TLO.CombineTo(Op, Op0);
1073     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1074       return TLO.CombineTo(Op, Op1);
1075     // If all of the demanded bits in the inputs are known zeros, return zero.
1076     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1077       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1078     // If the RHS is a constant, see if we can simplify it.
1079     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1080       return true;
1081     // If the operation can be done in a smaller type, do so.
1082     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1083       return true;
1084 
1085     // Output known-1 bits are only known if set in both the LHS & RHS.
1086     Known.One &= Known2.One;
1087     // Output known-0 are known to be clear if zero in either the LHS | RHS.
1088     Known.Zero |= Known2.Zero;
1089     break;
1090   }
1091   case ISD::OR: {
1092     SDValue Op0 = Op.getOperand(0);
1093     SDValue Op1 = Op.getOperand(1);
1094 
1095     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1096                              Depth + 1))
1097       return true;
1098     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1099     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1100                              Known2, TLO, Depth + 1))
1101       return true;
1102     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1103 
1104     // Attempt to avoid multi-use ops if we don't need anything from them.
1105     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1106       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1107           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1108       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1109           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1110       if (DemandedOp0 || DemandedOp1) {
1111         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1112         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1113         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1114         return TLO.CombineTo(Op, NewOp);
1115       }
1116     }
1117 
1118     // If all of the demanded bits are known zero on one side, return the other.
1119     // These bits cannot contribute to the result of the 'or'.
1120     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1121       return TLO.CombineTo(Op, Op0);
1122     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1123       return TLO.CombineTo(Op, Op1);
1124     // If the RHS is a constant, see if we can simplify it.
1125     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1126       return true;
1127     // If the operation can be done in a smaller type, do so.
1128     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1129       return true;
1130 
1131     // Output known-0 bits are only known if clear in both the LHS & RHS.
1132     Known.Zero &= Known2.Zero;
1133     // Output known-1 are known to be set if set in either the LHS | RHS.
1134     Known.One |= Known2.One;
1135     break;
1136   }
1137   case ISD::XOR: {
1138     SDValue Op0 = Op.getOperand(0);
1139     SDValue Op1 = Op.getOperand(1);
1140 
1141     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1142                              Depth + 1))
1143       return true;
1144     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1145     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1146                              Depth + 1))
1147       return true;
1148     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1149 
1150     // Attempt to avoid multi-use ops if we don't need anything from them.
1151     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1152       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1153           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1154       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1155           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1156       if (DemandedOp0 || DemandedOp1) {
1157         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1158         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1159         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1160         return TLO.CombineTo(Op, NewOp);
1161       }
1162     }
1163 
1164     // If all of the demanded bits are known zero on one side, return the other.
1165     // These bits cannot contribute to the result of the 'xor'.
1166     if (DemandedBits.isSubsetOf(Known.Zero))
1167       return TLO.CombineTo(Op, Op0);
1168     if (DemandedBits.isSubsetOf(Known2.Zero))
1169       return TLO.CombineTo(Op, Op1);
1170     // If the operation can be done in a smaller type, do so.
1171     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1172       return true;
1173 
1174     // If all of the unknown bits are known to be zero on one side or the other
1175     // (but not both) turn this into an *inclusive* or.
1176     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1177     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1178       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1179 
1180     // Output known-0 bits are known if clear or set in both the LHS & RHS.
1181     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1182     // Output known-1 are known to be set if set in only one of the LHS, RHS.
1183     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1184 
1185     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1186       // If one side is a constant, and all of the known set bits on the other
1187       // side are also set in the constant, turn this into an AND, as we know
1188       // the bits will be cleared.
1189       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1190       // NB: it is okay if more bits are known than are requested
1191       if (C->getAPIntValue() == Known2.One) {
1192         SDValue ANDC =
1193             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1194         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1195       }
1196 
1197       // If the RHS is a constant, see if we can change it. Don't alter a -1
1198       // constant because that's a 'not' op, and that is better for combining
1199       // and codegen.
1200       if (!C->isAllOnesValue()) {
1201         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1202           // We're flipping all demanded bits. Flip the undemanded bits too.
1203           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1204           return TLO.CombineTo(Op, New);
1205         }
1206         // If we can't turn this into a 'not', try to shrink the constant.
1207         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1208           return true;
1209       }
1210     }
1211 
1212     Known = std::move(KnownOut);
1213     break;
1214   }
1215   case ISD::SELECT:
1216     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1217                              Depth + 1))
1218       return true;
1219     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1220                              Depth + 1))
1221       return true;
1222     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1223     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1224 
1225     // If the operands are constants, see if we can simplify them.
1226     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1227       return true;
1228 
1229     // Only known if known in both the LHS and RHS.
1230     Known.One &= Known2.One;
1231     Known.Zero &= Known2.Zero;
1232     break;
1233   case ISD::SELECT_CC:
1234     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1235                              Depth + 1))
1236       return true;
1237     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1238                              Depth + 1))
1239       return true;
1240     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1241     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1242 
1243     // If the operands are constants, see if we can simplify them.
1244     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1245       return true;
1246 
1247     // Only known if known in both the LHS and RHS.
1248     Known.One &= Known2.One;
1249     Known.Zero &= Known2.Zero;
1250     break;
1251   case ISD::SETCC: {
1252     SDValue Op0 = Op.getOperand(0);
1253     SDValue Op1 = Op.getOperand(1);
1254     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1255     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1256     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1257     // -1, we may be able to bypass the setcc.
1258     if (DemandedBits.isSignMask() &&
1259         Op0.getScalarValueSizeInBits() == BitWidth &&
1260         getBooleanContents(VT) ==
1261             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1262       // If we're testing X < 0, then this compare isn't needed - just use X!
1263       // FIXME: We're limiting to integer types here, but this should also work
1264       // if we don't care about FP signed-zero. The use of SETLT with FP means
1265       // that we don't care about NaNs.
1266       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1267           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1268         return TLO.CombineTo(Op, Op0);
1269 
1270       // TODO: Should we check for other forms of sign-bit comparisons?
1271       // Examples: X <= -1, X >= 0
1272     }
1273     if (getBooleanContents(Op0.getValueType()) ==
1274             TargetLowering::ZeroOrOneBooleanContent &&
1275         BitWidth > 1)
1276       Known.Zero.setBitsFrom(1);
1277     break;
1278   }
1279   case ISD::SHL: {
1280     SDValue Op0 = Op.getOperand(0);
1281     SDValue Op1 = Op.getOperand(1);
1282 
1283     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1284       // If the shift count is an invalid immediate, don't do anything.
1285       if (SA->getAPIntValue().uge(BitWidth))
1286         break;
1287 
1288       unsigned ShAmt = SA->getZExtValue();
1289       if (ShAmt == 0)
1290         return TLO.CombineTo(Op, Op0);
1291 
1292       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1293       // single shift.  We can do this if the bottom bits (which are shifted
1294       // out) are never demanded.
1295       // TODO - support non-uniform vector amounts.
1296       if (Op0.getOpcode() == ISD::SRL) {
1297         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1298           if (ConstantSDNode *SA2 =
1299                   isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1300             if (SA2->getAPIntValue().ult(BitWidth)) {
1301               unsigned C1 = SA2->getZExtValue();
1302               unsigned Opc = ISD::SHL;
1303               int Diff = ShAmt - C1;
1304               if (Diff < 0) {
1305                 Diff = -Diff;
1306                 Opc = ISD::SRL;
1307               }
1308 
1309               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1310               return TLO.CombineTo(
1311                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1312             }
1313           }
1314         }
1315       }
1316 
1317       if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1318                                Known, TLO, Depth + 1))
1319         return true;
1320 
1321       // Try shrinking the operation as long as the shift amount will still be
1322       // in range.
1323       if ((ShAmt < DemandedBits.getActiveBits()) &&
1324           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1325         return true;
1326 
1327       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1328       // are not demanded. This will likely allow the anyext to be folded away.
1329       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1330         SDValue InnerOp = Op0.getOperand(0);
1331         EVT InnerVT = InnerOp.getValueType();
1332         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1333         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1334             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1335           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1336           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1337             ShTy = InnerVT;
1338           SDValue NarrowShl =
1339               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1340                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1341           return TLO.CombineTo(
1342               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1343         }
1344         // Repeat the SHL optimization above in cases where an extension
1345         // intervenes: (shl (anyext (shr x, c1)), c2) to
1346         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1347         // aren't demanded (as above) and that the shifted upper c1 bits of
1348         // x aren't demanded.
1349         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1350             InnerOp.hasOneUse()) {
1351           if (ConstantSDNode *SA2 =
1352                   isConstOrConstSplat(InnerOp.getOperand(1))) {
1353             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1354             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1355                 DemandedBits.getActiveBits() <=
1356                     (InnerBits - InnerShAmt + ShAmt) &&
1357                 DemandedBits.countTrailingZeros() >= ShAmt) {
1358               SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1359                                                   Op1.getValueType());
1360               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1361                                                InnerOp.getOperand(0));
1362               return TLO.CombineTo(
1363                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1364             }
1365           }
1366         }
1367       }
1368 
1369       Known.Zero <<= ShAmt;
1370       Known.One <<= ShAmt;
1371       // low bits known zero.
1372       Known.Zero.setLowBits(ShAmt);
1373     }
1374     break;
1375   }
1376   case ISD::SRL: {
1377     SDValue Op0 = Op.getOperand(0);
1378     SDValue Op1 = Op.getOperand(1);
1379 
1380     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1381       // If the shift count is an invalid immediate, don't do anything.
1382       if (SA->getAPIntValue().uge(BitWidth))
1383         break;
1384 
1385       unsigned ShAmt = SA->getZExtValue();
1386       if (ShAmt == 0)
1387         return TLO.CombineTo(Op, Op0);
1388 
1389       EVT ShiftVT = Op1.getValueType();
1390       APInt InDemandedMask = (DemandedBits << ShAmt);
1391 
1392       // If the shift is exact, then it does demand the low bits (and knows that
1393       // they are zero).
1394       if (Op->getFlags().hasExact())
1395         InDemandedMask.setLowBits(ShAmt);
1396 
1397       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1398       // single shift.  We can do this if the top bits (which are shifted out)
1399       // are never demanded.
1400       // TODO - support non-uniform vector amounts.
1401       if (Op0.getOpcode() == ISD::SHL) {
1402         if (ConstantSDNode *SA2 =
1403                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1404           if (!DemandedBits.intersects(
1405                   APInt::getHighBitsSet(BitWidth, ShAmt))) {
1406             if (SA2->getAPIntValue().ult(BitWidth)) {
1407               unsigned C1 = SA2->getZExtValue();
1408               unsigned Opc = ISD::SRL;
1409               int Diff = ShAmt - C1;
1410               if (Diff < 0) {
1411                 Diff = -Diff;
1412                 Opc = ISD::SHL;
1413               }
1414 
1415               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1416               return TLO.CombineTo(
1417                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1418             }
1419           }
1420         }
1421       }
1422 
1423       // Compute the new bits that are at the top now.
1424       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1425                                Depth + 1))
1426         return true;
1427       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1428       Known.Zero.lshrInPlace(ShAmt);
1429       Known.One.lshrInPlace(ShAmt);
1430 
1431       Known.Zero.setHighBits(ShAmt); // High bits known zero.
1432     }
1433     break;
1434   }
1435   case ISD::SRA: {
1436     SDValue Op0 = Op.getOperand(0);
1437     SDValue Op1 = Op.getOperand(1);
1438 
1439     // If this is an arithmetic shift right and only the low-bit is set, we can
1440     // always convert this into a logical shr, even if the shift amount is
1441     // variable.  The low bit of the shift cannot be an input sign bit unless
1442     // the shift amount is >= the size of the datatype, which is undefined.
1443     if (DemandedBits.isOneValue())
1444       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1445 
1446     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1447       // If the shift count is an invalid immediate, don't do anything.
1448       if (SA->getAPIntValue().uge(BitWidth))
1449         break;
1450 
1451       unsigned ShAmt = SA->getZExtValue();
1452       if (ShAmt == 0)
1453         return TLO.CombineTo(Op, Op0);
1454 
1455       APInt InDemandedMask = (DemandedBits << ShAmt);
1456 
1457       // If the shift is exact, then it does demand the low bits (and knows that
1458       // they are zero).
1459       if (Op->getFlags().hasExact())
1460         InDemandedMask.setLowBits(ShAmt);
1461 
1462       // If any of the demanded bits are produced by the sign extension, we also
1463       // demand the input sign bit.
1464       if (DemandedBits.countLeadingZeros() < ShAmt)
1465         InDemandedMask.setSignBit();
1466 
1467       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1468                                Depth + 1))
1469         return true;
1470       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1471       Known.Zero.lshrInPlace(ShAmt);
1472       Known.One.lshrInPlace(ShAmt);
1473 
1474       // If the input sign bit is known to be zero, or if none of the top bits
1475       // are demanded, turn this into an unsigned shift right.
1476       if (Known.Zero[BitWidth - ShAmt - 1] ||
1477           DemandedBits.countLeadingZeros() >= ShAmt) {
1478         SDNodeFlags Flags;
1479         Flags.setExact(Op->getFlags().hasExact());
1480         return TLO.CombineTo(
1481             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1482       }
1483 
1484       int Log2 = DemandedBits.exactLogBase2();
1485       if (Log2 >= 0) {
1486         // The bit must come from the sign.
1487         SDValue NewSA =
1488             TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1489         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1490       }
1491 
1492       if (Known.One[BitWidth - ShAmt - 1])
1493         // New bits are known one.
1494         Known.One.setHighBits(ShAmt);
1495     }
1496     break;
1497   }
1498   case ISD::FSHL:
1499   case ISD::FSHR: {
1500     SDValue Op0 = Op.getOperand(0);
1501     SDValue Op1 = Op.getOperand(1);
1502     SDValue Op2 = Op.getOperand(2);
1503     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1504 
1505     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1506       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1507 
1508       // For fshl, 0-shift returns the 1st arg.
1509       // For fshr, 0-shift returns the 2nd arg.
1510       if (Amt == 0) {
1511         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1512                                  Known, TLO, Depth + 1))
1513           return true;
1514         break;
1515       }
1516 
1517       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1518       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1519       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1520       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1521       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1522                                Depth + 1))
1523         return true;
1524       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1525                                Depth + 1))
1526         return true;
1527 
1528       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1529       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1530       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1531       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1532       Known.One |= Known2.One;
1533       Known.Zero |= Known2.Zero;
1534     }
1535     break;
1536   }
1537   case ISD::BITREVERSE: {
1538     SDValue Src = Op.getOperand(0);
1539     APInt DemandedSrcBits = DemandedBits.reverseBits();
1540     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1541                              Depth + 1))
1542       return true;
1543     Known.One = Known2.One.reverseBits();
1544     Known.Zero = Known2.Zero.reverseBits();
1545     break;
1546   }
1547   case ISD::BSWAP: {
1548     SDValue Src = Op.getOperand(0);
1549     APInt DemandedSrcBits = DemandedBits.byteSwap();
1550     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1551                              Depth + 1))
1552       return true;
1553     Known.One = Known2.One.byteSwap();
1554     Known.Zero = Known2.Zero.byteSwap();
1555     break;
1556   }
1557   case ISD::SIGN_EXTEND_INREG: {
1558     SDValue Op0 = Op.getOperand(0);
1559     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1560     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1561 
1562     // If we only care about the highest bit, don't bother shifting right.
1563     if (DemandedBits.isSignMask()) {
1564       unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1565       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1566       // However if the input is already sign extended we expect the sign
1567       // extension to be dropped altogether later and do not simplify.
1568       if (!AlreadySignExtended) {
1569         // Compute the correct shift amount type, which must be getShiftAmountTy
1570         // for scalar types after legalization.
1571         EVT ShiftAmtTy = VT;
1572         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1573           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1574 
1575         SDValue ShiftAmt =
1576             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1577         return TLO.CombineTo(Op,
1578                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1579       }
1580     }
1581 
1582     // If none of the extended bits are demanded, eliminate the sextinreg.
1583     if (DemandedBits.getActiveBits() <= ExVTBits)
1584       return TLO.CombineTo(Op, Op0);
1585 
1586     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1587 
1588     // Since the sign extended bits are demanded, we know that the sign
1589     // bit is demanded.
1590     InputDemandedBits.setBit(ExVTBits - 1);
1591 
1592     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1593       return true;
1594     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1595 
1596     // If the sign bit of the input is known set or clear, then we know the
1597     // top bits of the result.
1598 
1599     // If the input sign bit is known zero, convert this into a zero extension.
1600     if (Known.Zero[ExVTBits - 1])
1601       return TLO.CombineTo(
1602           Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1603 
1604     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1605     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1606       Known.One.setBitsFrom(ExVTBits);
1607       Known.Zero &= Mask;
1608     } else { // Input sign bit unknown
1609       Known.Zero &= Mask;
1610       Known.One &= Mask;
1611     }
1612     break;
1613   }
1614   case ISD::BUILD_PAIR: {
1615     EVT HalfVT = Op.getOperand(0).getValueType();
1616     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1617 
1618     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1619     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1620 
1621     KnownBits KnownLo, KnownHi;
1622 
1623     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1624       return true;
1625 
1626     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1627       return true;
1628 
1629     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1630                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1631 
1632     Known.One = KnownLo.One.zext(BitWidth) |
1633                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1634     break;
1635   }
1636   case ISD::ZERO_EXTEND:
1637   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1638     SDValue Src = Op.getOperand(0);
1639     EVT SrcVT = Src.getValueType();
1640     unsigned InBits = SrcVT.getScalarSizeInBits();
1641     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1642     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1643 
1644     // If none of the top bits are demanded, convert this into an any_extend.
1645     if (DemandedBits.getActiveBits() <= InBits) {
1646       // If we only need the non-extended bits of the bottom element
1647       // then we can just bitcast to the result.
1648       if (IsVecInReg && DemandedElts == 1 &&
1649           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1650           TLO.DAG.getDataLayout().isLittleEndian())
1651         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1652 
1653       unsigned Opc =
1654           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1655       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1656         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1657     }
1658 
1659     APInt InDemandedBits = DemandedBits.trunc(InBits);
1660     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1661     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1662                              Depth + 1))
1663       return true;
1664     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1665     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1666     Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1667     break;
1668   }
1669   case ISD::SIGN_EXTEND:
1670   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1671     SDValue Src = Op.getOperand(0);
1672     EVT SrcVT = Src.getValueType();
1673     unsigned InBits = SrcVT.getScalarSizeInBits();
1674     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1675     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1676 
1677     // If none of the top bits are demanded, convert this into an any_extend.
1678     if (DemandedBits.getActiveBits() <= InBits) {
1679       // If we only need the non-extended bits of the bottom element
1680       // then we can just bitcast to the result.
1681       if (IsVecInReg && DemandedElts == 1 &&
1682           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1683           TLO.DAG.getDataLayout().isLittleEndian())
1684         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1685 
1686       unsigned Opc =
1687           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1688       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1689         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1690     }
1691 
1692     APInt InDemandedBits = DemandedBits.trunc(InBits);
1693     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1694 
1695     // Since some of the sign extended bits are demanded, we know that the sign
1696     // bit is demanded.
1697     InDemandedBits.setBit(InBits - 1);
1698 
1699     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1700                              Depth + 1))
1701       return true;
1702     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1703     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1704 
1705     // If the sign bit is known one, the top bits match.
1706     Known = Known.sext(BitWidth);
1707 
1708     // If the sign bit is known zero, convert this to a zero extend.
1709     if (Known.isNonNegative()) {
1710       unsigned Opc =
1711           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1712       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1713         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1714     }
1715     break;
1716   }
1717   case ISD::ANY_EXTEND:
1718   case ISD::ANY_EXTEND_VECTOR_INREG: {
1719     SDValue Src = Op.getOperand(0);
1720     EVT SrcVT = Src.getValueType();
1721     unsigned InBits = SrcVT.getScalarSizeInBits();
1722     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1723     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1724 
1725     // If we only need the bottom element then we can just bitcast.
1726     // TODO: Handle ANY_EXTEND?
1727     if (IsVecInReg && DemandedElts == 1 &&
1728         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1729         TLO.DAG.getDataLayout().isLittleEndian())
1730       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1731 
1732     APInt InDemandedBits = DemandedBits.trunc(InBits);
1733     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1734     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1735                              Depth + 1))
1736       return true;
1737     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1738     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1739     Known = Known.zext(BitWidth, false /* => any extend */);
1740     break;
1741   }
1742   case ISD::TRUNCATE: {
1743     SDValue Src = Op.getOperand(0);
1744 
1745     // Simplify the input, using demanded bit information, and compute the known
1746     // zero/one bits live out.
1747     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1748     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1749     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1750       return true;
1751     Known = Known.trunc(BitWidth);
1752 
1753     // Attempt to avoid multi-use ops if we don't need anything from them.
1754     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1755             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1756       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1757 
1758     // If the input is only used by this truncate, see if we can shrink it based
1759     // on the known demanded bits.
1760     if (Src.getNode()->hasOneUse()) {
1761       switch (Src.getOpcode()) {
1762       default:
1763         break;
1764       case ISD::SRL:
1765         // Shrink SRL by a constant if none of the high bits shifted in are
1766         // demanded.
1767         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1768           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1769           // undesirable.
1770           break;
1771 
1772         SDValue ShAmt = Src.getOperand(1);
1773         auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1774         if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
1775           break;
1776         uint64_t ShVal = ShAmtC->getZExtValue();
1777 
1778         APInt HighBits =
1779             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1780         HighBits.lshrInPlace(ShVal);
1781         HighBits = HighBits.trunc(BitWidth);
1782 
1783         if (!(HighBits & DemandedBits)) {
1784           // None of the shifted in bits are needed.  Add a truncate of the
1785           // shift input, then shift it.
1786           if (TLO.LegalTypes())
1787             ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1788           SDValue NewTrunc =
1789               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1790           return TLO.CombineTo(
1791               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
1792         }
1793         break;
1794       }
1795     }
1796 
1797     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1798     break;
1799   }
1800   case ISD::AssertZext: {
1801     // AssertZext demands all of the high bits, plus any of the low bits
1802     // demanded by its users.
1803     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1804     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1805     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1806                              TLO, Depth + 1))
1807       return true;
1808     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1809 
1810     Known.Zero |= ~InMask;
1811     break;
1812   }
1813   case ISD::EXTRACT_VECTOR_ELT: {
1814     SDValue Src = Op.getOperand(0);
1815     SDValue Idx = Op.getOperand(1);
1816     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1817     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1818 
1819     // Demand the bits from every vector element without a constant index.
1820     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1821     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1822       if (CIdx->getAPIntValue().ult(NumSrcElts))
1823         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1824 
1825     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1826     // anything about the extended bits.
1827     APInt DemandedSrcBits = DemandedBits;
1828     if (BitWidth > EltBitWidth)
1829       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1830 
1831     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1832                              Depth + 1))
1833       return true;
1834 
1835     Known = Known2;
1836     if (BitWidth > EltBitWidth)
1837       Known = Known.zext(BitWidth, false /* => any extend */);
1838     break;
1839   }
1840   case ISD::BITCAST: {
1841     SDValue Src = Op.getOperand(0);
1842     EVT SrcVT = Src.getValueType();
1843     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1844 
1845     // If this is an FP->Int bitcast and if the sign bit is the only
1846     // thing demanded, turn this into a FGETSIGN.
1847     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1848         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1849         SrcVT.isFloatingPoint()) {
1850       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1851       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1852       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1853           SrcVT != MVT::f128) {
1854         // Cannot eliminate/lower SHL for f128 yet.
1855         EVT Ty = OpVTLegal ? VT : MVT::i32;
1856         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1857         // place.  We expect the SHL to be eliminated by other optimizations.
1858         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1859         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1860         if (!OpVTLegal && OpVTSizeInBits > 32)
1861           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1862         unsigned ShVal = Op.getValueSizeInBits() - 1;
1863         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1864         return TLO.CombineTo(Op,
1865                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1866       }
1867     }
1868 
1869     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1870     // Demand the elt/bit if any of the original elts/bits are demanded.
1871     // TODO - bigendian once we have test coverage.
1872     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
1873         TLO.DAG.getDataLayout().isLittleEndian()) {
1874       unsigned Scale = BitWidth / NumSrcEltBits;
1875       unsigned NumSrcElts = SrcVT.getVectorNumElements();
1876       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1877       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1878       for (unsigned i = 0; i != Scale; ++i) {
1879         unsigned Offset = i * NumSrcEltBits;
1880         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1881         if (!Sub.isNullValue()) {
1882           DemandedSrcBits |= Sub;
1883           for (unsigned j = 0; j != NumElts; ++j)
1884             if (DemandedElts[j])
1885               DemandedSrcElts.setBit((j * Scale) + i);
1886         }
1887       }
1888 
1889       APInt KnownSrcUndef, KnownSrcZero;
1890       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1891                                      KnownSrcZero, TLO, Depth + 1))
1892         return true;
1893 
1894       KnownBits KnownSrcBits;
1895       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1896                                KnownSrcBits, TLO, Depth + 1))
1897         return true;
1898     } else if ((NumSrcEltBits % BitWidth) == 0 &&
1899                TLO.DAG.getDataLayout().isLittleEndian()) {
1900       unsigned Scale = NumSrcEltBits / BitWidth;
1901       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1902       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1903       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1904       for (unsigned i = 0; i != NumElts; ++i)
1905         if (DemandedElts[i]) {
1906           unsigned Offset = (i % Scale) * BitWidth;
1907           DemandedSrcBits.insertBits(DemandedBits, Offset);
1908           DemandedSrcElts.setBit(i / Scale);
1909         }
1910 
1911       if (SrcVT.isVector()) {
1912         APInt KnownSrcUndef, KnownSrcZero;
1913         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1914                                        KnownSrcZero, TLO, Depth + 1))
1915           return true;
1916       }
1917 
1918       KnownBits KnownSrcBits;
1919       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1920                                KnownSrcBits, TLO, Depth + 1))
1921         return true;
1922     }
1923 
1924     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1925     // recursive call where Known may be useful to the caller.
1926     if (Depth > 0) {
1927       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1928       return false;
1929     }
1930     break;
1931   }
1932   case ISD::ADD:
1933   case ISD::MUL:
1934   case ISD::SUB: {
1935     // Add, Sub, and Mul don't demand any bits in positions beyond that
1936     // of the highest bit demanded of them.
1937     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1938     SDNodeFlags Flags = Op.getNode()->getFlags();
1939     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1940     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1941     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1942                              Depth + 1) ||
1943         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1944                              Depth + 1) ||
1945         // See if the operation should be performed at a smaller bit width.
1946         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1947       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1948         // Disable the nsw and nuw flags. We can no longer guarantee that we
1949         // won't wrap after simplification.
1950         Flags.setNoSignedWrap(false);
1951         Flags.setNoUnsignedWrap(false);
1952         SDValue NewOp =
1953             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1954         return TLO.CombineTo(Op, NewOp);
1955       }
1956       return true;
1957     }
1958 
1959     // Attempt to avoid multi-use ops if we don't need anything from them.
1960     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1961       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1962           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1963       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1964           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1965       if (DemandedOp0 || DemandedOp1) {
1966         Flags.setNoSignedWrap(false);
1967         Flags.setNoUnsignedWrap(false);
1968         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1969         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1970         SDValue NewOp =
1971             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1972         return TLO.CombineTo(Op, NewOp);
1973       }
1974     }
1975 
1976     // If we have a constant operand, we may be able to turn it into -1 if we
1977     // do not demand the high bits. This can make the constant smaller to
1978     // encode, allow more general folding, or match specialized instruction
1979     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1980     // is probably not useful (and could be detrimental).
1981     ConstantSDNode *C = isConstOrConstSplat(Op1);
1982     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1983     if (C && !C->isAllOnesValue() && !C->isOne() &&
1984         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1985       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1986       // Disable the nsw and nuw flags. We can no longer guarantee that we
1987       // won't wrap after simplification.
1988       Flags.setNoSignedWrap(false);
1989       Flags.setNoUnsignedWrap(false);
1990       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1991       return TLO.CombineTo(Op, NewOp);
1992     }
1993 
1994     LLVM_FALLTHROUGH;
1995   }
1996   default:
1997     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
1998       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
1999                                             Known, TLO, Depth))
2000         return true;
2001       break;
2002     }
2003 
2004     // Just use computeKnownBits to compute output bits.
2005     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2006     break;
2007   }
2008 
2009   // If we know the value of all of the demanded bits, return this as a
2010   // constant.
2011   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2012     // Avoid folding to a constant if any OpaqueConstant is involved.
2013     const SDNode *N = Op.getNode();
2014     for (SDNodeIterator I = SDNodeIterator::begin(N),
2015                         E = SDNodeIterator::end(N);
2016          I != E; ++I) {
2017       SDNode *Op = *I;
2018       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2019         if (C->isOpaque())
2020           return false;
2021     }
2022     // TODO: Handle float bits as well.
2023     if (VT.isInteger())
2024       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2025   }
2026 
2027   return false;
2028 }
2029 
2030 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2031                                                 const APInt &DemandedElts,
2032                                                 APInt &KnownUndef,
2033                                                 APInt &KnownZero,
2034                                                 DAGCombinerInfo &DCI) const {
2035   SelectionDAG &DAG = DCI.DAG;
2036   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2037                         !DCI.isBeforeLegalizeOps());
2038 
2039   bool Simplified =
2040       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2041   if (Simplified) {
2042     DCI.AddToWorklist(Op.getNode());
2043     DCI.CommitTargetLoweringOpt(TLO);
2044   }
2045 
2046   return Simplified;
2047 }
2048 
2049 /// Given a vector binary operation and known undefined elements for each input
2050 /// operand, compute whether each element of the output is undefined.
2051 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2052                                          const APInt &UndefOp0,
2053                                          const APInt &UndefOp1) {
2054   EVT VT = BO.getValueType();
2055   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2056          "Vector binop only");
2057 
2058   EVT EltVT = VT.getVectorElementType();
2059   unsigned NumElts = VT.getVectorNumElements();
2060   assert(UndefOp0.getBitWidth() == NumElts &&
2061          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2062 
2063   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2064                                    const APInt &UndefVals) {
2065     if (UndefVals[Index])
2066       return DAG.getUNDEF(EltVT);
2067 
2068     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2069       // Try hard to make sure that the getNode() call is not creating temporary
2070       // nodes. Ignore opaque integers because they do not constant fold.
2071       SDValue Elt = BV->getOperand(Index);
2072       auto *C = dyn_cast<ConstantSDNode>(Elt);
2073       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2074         return Elt;
2075     }
2076 
2077     return SDValue();
2078   };
2079 
2080   APInt KnownUndef = APInt::getNullValue(NumElts);
2081   for (unsigned i = 0; i != NumElts; ++i) {
2082     // If both inputs for this element are either constant or undef and match
2083     // the element type, compute the constant/undef result for this element of
2084     // the vector.
2085     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2086     // not handle FP constants. The code within getNode() should be refactored
2087     // to avoid the danger of creating a bogus temporary node here.
2088     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2089     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2090     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2091       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2092         KnownUndef.setBit(i);
2093   }
2094   return KnownUndef;
2095 }
2096 
2097 bool TargetLowering::SimplifyDemandedVectorElts(
2098     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2099     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2100     bool AssumeSingleUse) const {
2101   EVT VT = Op.getValueType();
2102   APInt DemandedElts = OriginalDemandedElts;
2103   unsigned NumElts = DemandedElts.getBitWidth();
2104   assert(VT.isVector() && "Expected vector op");
2105   assert(VT.getVectorNumElements() == NumElts &&
2106          "Mask size mismatches value type element count!");
2107 
2108   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2109 
2110   // Undef operand.
2111   if (Op.isUndef()) {
2112     KnownUndef.setAllBits();
2113     return false;
2114   }
2115 
2116   // If Op has other users, assume that all elements are needed.
2117   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2118     DemandedElts.setAllBits();
2119 
2120   // Not demanding any elements from Op.
2121   if (DemandedElts == 0) {
2122     KnownUndef.setAllBits();
2123     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2124   }
2125 
2126   // Limit search depth.
2127   if (Depth >= SelectionDAG::MaxRecursionDepth)
2128     return false;
2129 
2130   SDLoc DL(Op);
2131   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2132 
2133   switch (Op.getOpcode()) {
2134   case ISD::SCALAR_TO_VECTOR: {
2135     if (!DemandedElts[0]) {
2136       KnownUndef.setAllBits();
2137       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2138     }
2139     KnownUndef.setHighBits(NumElts - 1);
2140     break;
2141   }
2142   case ISD::BITCAST: {
2143     SDValue Src = Op.getOperand(0);
2144     EVT SrcVT = Src.getValueType();
2145 
2146     // We only handle vectors here.
2147     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2148     if (!SrcVT.isVector())
2149       break;
2150 
2151     // Fast handling of 'identity' bitcasts.
2152     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2153     if (NumSrcElts == NumElts)
2154       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2155                                         KnownZero, TLO, Depth + 1);
2156 
2157     APInt SrcZero, SrcUndef;
2158     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2159 
2160     // Bitcast from 'large element' src vector to 'small element' vector, we
2161     // must demand a source element if any DemandedElt maps to it.
2162     if ((NumElts % NumSrcElts) == 0) {
2163       unsigned Scale = NumElts / NumSrcElts;
2164       for (unsigned i = 0; i != NumElts; ++i)
2165         if (DemandedElts[i])
2166           SrcDemandedElts.setBit(i / Scale);
2167 
2168       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2169                                      TLO, Depth + 1))
2170         return true;
2171 
2172       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2173       // of the large element.
2174       // TODO - bigendian once we have test coverage.
2175       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2176         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2177         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2178         for (unsigned i = 0; i != NumElts; ++i)
2179           if (DemandedElts[i]) {
2180             unsigned Ofs = (i % Scale) * EltSizeInBits;
2181             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2182           }
2183 
2184         KnownBits Known;
2185         if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2186           return true;
2187       }
2188 
2189       // If the src element is zero/undef then all the output elements will be -
2190       // only demanded elements are guaranteed to be correct.
2191       for (unsigned i = 0; i != NumSrcElts; ++i) {
2192         if (SrcDemandedElts[i]) {
2193           if (SrcZero[i])
2194             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2195           if (SrcUndef[i])
2196             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2197         }
2198       }
2199     }
2200 
2201     // Bitcast from 'small element' src vector to 'large element' vector, we
2202     // demand all smaller source elements covered by the larger demanded element
2203     // of this vector.
2204     if ((NumSrcElts % NumElts) == 0) {
2205       unsigned Scale = NumSrcElts / NumElts;
2206       for (unsigned i = 0; i != NumElts; ++i)
2207         if (DemandedElts[i])
2208           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2209 
2210       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2211                                      TLO, Depth + 1))
2212         return true;
2213 
2214       // If all the src elements covering an output element are zero/undef, then
2215       // the output element will be as well, assuming it was demanded.
2216       for (unsigned i = 0; i != NumElts; ++i) {
2217         if (DemandedElts[i]) {
2218           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2219             KnownZero.setBit(i);
2220           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2221             KnownUndef.setBit(i);
2222         }
2223       }
2224     }
2225     break;
2226   }
2227   case ISD::BUILD_VECTOR: {
2228     // Check all elements and simplify any unused elements with UNDEF.
2229     if (!DemandedElts.isAllOnesValue()) {
2230       // Don't simplify BROADCASTS.
2231       if (llvm::any_of(Op->op_values(),
2232                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2233         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2234         bool Updated = false;
2235         for (unsigned i = 0; i != NumElts; ++i) {
2236           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2237             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2238             KnownUndef.setBit(i);
2239             Updated = true;
2240           }
2241         }
2242         if (Updated)
2243           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2244       }
2245     }
2246     for (unsigned i = 0; i != NumElts; ++i) {
2247       SDValue SrcOp = Op.getOperand(i);
2248       if (SrcOp.isUndef()) {
2249         KnownUndef.setBit(i);
2250       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2251                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2252         KnownZero.setBit(i);
2253       }
2254     }
2255     break;
2256   }
2257   case ISD::CONCAT_VECTORS: {
2258     EVT SubVT = Op.getOperand(0).getValueType();
2259     unsigned NumSubVecs = Op.getNumOperands();
2260     unsigned NumSubElts = SubVT.getVectorNumElements();
2261     for (unsigned i = 0; i != NumSubVecs; ++i) {
2262       SDValue SubOp = Op.getOperand(i);
2263       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2264       APInt SubUndef, SubZero;
2265       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2266                                      Depth + 1))
2267         return true;
2268       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2269       KnownZero.insertBits(SubZero, i * NumSubElts);
2270     }
2271     break;
2272   }
2273   case ISD::INSERT_SUBVECTOR: {
2274     if (!isa<ConstantSDNode>(Op.getOperand(2)))
2275       break;
2276     SDValue Base = Op.getOperand(0);
2277     SDValue Sub = Op.getOperand(1);
2278     EVT SubVT = Sub.getValueType();
2279     unsigned NumSubElts = SubVT.getVectorNumElements();
2280     const APInt &Idx = Op.getConstantOperandAPInt(2);
2281     if (Idx.ugt(NumElts - NumSubElts))
2282       break;
2283     unsigned SubIdx = Idx.getZExtValue();
2284     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2285     APInt SubUndef, SubZero;
2286     if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2287                                    Depth + 1))
2288       return true;
2289     APInt BaseElts = DemandedElts;
2290     BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2291 
2292     // If none of the base operand elements are demanded, replace it with undef.
2293     if (!BaseElts && !Base.isUndef())
2294       return TLO.CombineTo(Op,
2295                            TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2296                                            TLO.DAG.getUNDEF(VT),
2297                                            Op.getOperand(1),
2298                                            Op.getOperand(2)));
2299 
2300     if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2301                                    Depth + 1))
2302       return true;
2303     KnownUndef.insertBits(SubUndef, SubIdx);
2304     KnownZero.insertBits(SubZero, SubIdx);
2305     break;
2306   }
2307   case ISD::EXTRACT_SUBVECTOR: {
2308     SDValue Src = Op.getOperand(0);
2309     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2310     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2311     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2312       // Offset the demanded elts by the subvector index.
2313       uint64_t Idx = SubIdx->getZExtValue();
2314       APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2315       APInt SrcUndef, SrcZero;
2316       if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2317                                      Depth + 1))
2318         return true;
2319       KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2320       KnownZero = SrcZero.extractBits(NumElts, Idx);
2321     }
2322     break;
2323   }
2324   case ISD::INSERT_VECTOR_ELT: {
2325     SDValue Vec = Op.getOperand(0);
2326     SDValue Scl = Op.getOperand(1);
2327     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2328 
2329     // For a legal, constant insertion index, if we don't need this insertion
2330     // then strip it, else remove it from the demanded elts.
2331     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2332       unsigned Idx = CIdx->getZExtValue();
2333       if (!DemandedElts[Idx])
2334         return TLO.CombineTo(Op, Vec);
2335 
2336       APInt DemandedVecElts(DemandedElts);
2337       DemandedVecElts.clearBit(Idx);
2338       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2339                                      KnownZero, TLO, Depth + 1))
2340         return true;
2341 
2342       KnownUndef.clearBit(Idx);
2343       if (Scl.isUndef())
2344         KnownUndef.setBit(Idx);
2345 
2346       KnownZero.clearBit(Idx);
2347       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2348         KnownZero.setBit(Idx);
2349       break;
2350     }
2351 
2352     APInt VecUndef, VecZero;
2353     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2354                                    Depth + 1))
2355       return true;
2356     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2357     break;
2358   }
2359   case ISD::VSELECT: {
2360     // Try to transform the select condition based on the current demanded
2361     // elements.
2362     // TODO: If a condition element is undef, we can choose from one arm of the
2363     //       select (and if one arm is undef, then we can propagate that to the
2364     //       result).
2365     // TODO - add support for constant vselect masks (see IR version of this).
2366     APInt UnusedUndef, UnusedZero;
2367     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2368                                    UnusedZero, TLO, Depth + 1))
2369       return true;
2370 
2371     // See if we can simplify either vselect operand.
2372     APInt DemandedLHS(DemandedElts);
2373     APInt DemandedRHS(DemandedElts);
2374     APInt UndefLHS, ZeroLHS;
2375     APInt UndefRHS, ZeroRHS;
2376     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2377                                    ZeroLHS, TLO, Depth + 1))
2378       return true;
2379     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2380                                    ZeroRHS, TLO, Depth + 1))
2381       return true;
2382 
2383     KnownUndef = UndefLHS & UndefRHS;
2384     KnownZero = ZeroLHS & ZeroRHS;
2385     break;
2386   }
2387   case ISD::VECTOR_SHUFFLE: {
2388     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2389 
2390     // Collect demanded elements from shuffle operands..
2391     APInt DemandedLHS(NumElts, 0);
2392     APInt DemandedRHS(NumElts, 0);
2393     for (unsigned i = 0; i != NumElts; ++i) {
2394       int M = ShuffleMask[i];
2395       if (M < 0 || !DemandedElts[i])
2396         continue;
2397       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2398       if (M < (int)NumElts)
2399         DemandedLHS.setBit(M);
2400       else
2401         DemandedRHS.setBit(M - NumElts);
2402     }
2403 
2404     // See if we can simplify either shuffle operand.
2405     APInt UndefLHS, ZeroLHS;
2406     APInt UndefRHS, ZeroRHS;
2407     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2408                                    ZeroLHS, TLO, Depth + 1))
2409       return true;
2410     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2411                                    ZeroRHS, TLO, Depth + 1))
2412       return true;
2413 
2414     // Simplify mask using undef elements from LHS/RHS.
2415     bool Updated = false;
2416     bool IdentityLHS = true, IdentityRHS = true;
2417     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2418     for (unsigned i = 0; i != NumElts; ++i) {
2419       int &M = NewMask[i];
2420       if (M < 0)
2421         continue;
2422       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2423           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2424         Updated = true;
2425         M = -1;
2426       }
2427       IdentityLHS &= (M < 0) || (M == (int)i);
2428       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2429     }
2430 
2431     // Update legal shuffle masks based on demanded elements if it won't reduce
2432     // to Identity which can cause premature removal of the shuffle mask.
2433     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2434       SDValue LegalShuffle =
2435           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2436                                   NewMask, TLO.DAG);
2437       if (LegalShuffle)
2438         return TLO.CombineTo(Op, LegalShuffle);
2439     }
2440 
2441     // Propagate undef/zero elements from LHS/RHS.
2442     for (unsigned i = 0; i != NumElts; ++i) {
2443       int M = ShuffleMask[i];
2444       if (M < 0) {
2445         KnownUndef.setBit(i);
2446       } else if (M < (int)NumElts) {
2447         if (UndefLHS[M])
2448           KnownUndef.setBit(i);
2449         if (ZeroLHS[M])
2450           KnownZero.setBit(i);
2451       } else {
2452         if (UndefRHS[M - NumElts])
2453           KnownUndef.setBit(i);
2454         if (ZeroRHS[M - NumElts])
2455           KnownZero.setBit(i);
2456       }
2457     }
2458     break;
2459   }
2460   case ISD::ANY_EXTEND_VECTOR_INREG:
2461   case ISD::SIGN_EXTEND_VECTOR_INREG:
2462   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2463     APInt SrcUndef, SrcZero;
2464     SDValue Src = Op.getOperand(0);
2465     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2466     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2467     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2468                                    Depth + 1))
2469       return true;
2470     KnownZero = SrcZero.zextOrTrunc(NumElts);
2471     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2472 
2473     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2474         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2475         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2476       // aext - if we just need the bottom element then we can bitcast.
2477       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2478     }
2479 
2480     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2481       // zext(undef) upper bits are guaranteed to be zero.
2482       if (DemandedElts.isSubsetOf(KnownUndef))
2483         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2484       KnownUndef.clearAllBits();
2485     }
2486     break;
2487   }
2488 
2489   // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2490   // MAX, saturated math, etc.
2491   case ISD::OR:
2492   case ISD::XOR:
2493   case ISD::ADD:
2494   case ISD::SUB:
2495   case ISD::FADD:
2496   case ISD::FSUB:
2497   case ISD::FMUL:
2498   case ISD::FDIV:
2499   case ISD::FREM: {
2500     APInt UndefRHS, ZeroRHS;
2501     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2502                                    ZeroRHS, TLO, Depth + 1))
2503       return true;
2504     APInt UndefLHS, ZeroLHS;
2505     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2506                                    ZeroLHS, TLO, Depth + 1))
2507       return true;
2508 
2509     KnownZero = ZeroLHS & ZeroRHS;
2510     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2511     break;
2512   }
2513   case ISD::SHL:
2514   case ISD::SRL:
2515   case ISD::SRA:
2516   case ISD::ROTL:
2517   case ISD::ROTR: {
2518     APInt UndefRHS, ZeroRHS;
2519     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2520                                    ZeroRHS, TLO, Depth + 1))
2521       return true;
2522     APInt UndefLHS, ZeroLHS;
2523     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2524                                    ZeroLHS, TLO, Depth + 1))
2525       return true;
2526 
2527     KnownZero = ZeroLHS;
2528     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2529     break;
2530   }
2531   case ISD::MUL:
2532   case ISD::AND: {
2533     APInt SrcUndef, SrcZero;
2534     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2535                                    SrcZero, TLO, Depth + 1))
2536       return true;
2537     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2538                                    KnownZero, TLO, Depth + 1))
2539       return true;
2540 
2541     // If either side has a zero element, then the result element is zero, even
2542     // if the other is an UNDEF.
2543     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2544     // and then handle 'and' nodes with the rest of the binop opcodes.
2545     KnownZero |= SrcZero;
2546     KnownUndef &= SrcUndef;
2547     KnownUndef &= ~KnownZero;
2548     break;
2549   }
2550   case ISD::TRUNCATE:
2551   case ISD::SIGN_EXTEND:
2552   case ISD::ZERO_EXTEND:
2553     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2554                                    KnownZero, TLO, Depth + 1))
2555       return true;
2556 
2557     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2558       // zext(undef) upper bits are guaranteed to be zero.
2559       if (DemandedElts.isSubsetOf(KnownUndef))
2560         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2561       KnownUndef.clearAllBits();
2562     }
2563     break;
2564   default: {
2565     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2566       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2567                                                   KnownZero, TLO, Depth))
2568         return true;
2569     } else {
2570       KnownBits Known;
2571       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2572       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2573                                TLO, Depth, AssumeSingleUse))
2574         return true;
2575     }
2576     break;
2577   }
2578   }
2579   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2580 
2581   // Constant fold all undef cases.
2582   // TODO: Handle zero cases as well.
2583   if (DemandedElts.isSubsetOf(KnownUndef))
2584     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2585 
2586   return false;
2587 }
2588 
2589 /// Determine which of the bits specified in Mask are known to be either zero or
2590 /// one and return them in the Known.
2591 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2592                                                    KnownBits &Known,
2593                                                    const APInt &DemandedElts,
2594                                                    const SelectionDAG &DAG,
2595                                                    unsigned Depth) const {
2596   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2597           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2598           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2599           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2600          "Should use MaskedValueIsZero if you don't know whether Op"
2601          " is a target node!");
2602   Known.resetAll();
2603 }
2604 
2605 void TargetLowering::computeKnownBitsForTargetInstr(
2606     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2607     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2608     unsigned Depth) const {
2609   Known.resetAll();
2610 }
2611 
2612 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2613                                                    KnownBits &Known,
2614                                                    const APInt &DemandedElts,
2615                                                    const SelectionDAG &DAG,
2616                                                    unsigned Depth) const {
2617   assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2618 
2619   if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2620     // The low bits are known zero if the pointer is aligned.
2621     Known.Zero.setLowBits(Log2_32(Align));
2622   }
2623 }
2624 
2625 /// This method can be implemented by targets that want to expose additional
2626 /// information about sign bits to the DAG Combiner.
2627 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2628                                                          const APInt &,
2629                                                          const SelectionDAG &,
2630                                                          unsigned Depth) const {
2631   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2632           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2633           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2634           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2635          "Should use ComputeNumSignBits if you don't know whether Op"
2636          " is a target node!");
2637   return 1;
2638 }
2639 
2640 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2641     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2642     TargetLoweringOpt &TLO, unsigned Depth) const {
2643   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2644           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2645           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2646           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2647          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2648          " is a target node!");
2649   return false;
2650 }
2651 
2652 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2653     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2654     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2655   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2656           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2657           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2658           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2659          "Should use SimplifyDemandedBits if you don't know whether Op"
2660          " is a target node!");
2661   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2662   return false;
2663 }
2664 
2665 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2666     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2667     SelectionDAG &DAG, unsigned Depth) const {
2668   assert(
2669       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2670        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2671        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2672        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2673       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2674       " is a target node!");
2675   return SDValue();
2676 }
2677 
2678 SDValue
2679 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2680                                         SDValue N1, MutableArrayRef<int> Mask,
2681                                         SelectionDAG &DAG) const {
2682   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2683   if (!LegalMask) {
2684     std::swap(N0, N1);
2685     ShuffleVectorSDNode::commuteMask(Mask);
2686     LegalMask = isShuffleMaskLegal(Mask, VT);
2687   }
2688 
2689   if (!LegalMask)
2690     return SDValue();
2691 
2692   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2693 }
2694 
2695 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2696   return nullptr;
2697 }
2698 
2699 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2700                                                   const SelectionDAG &DAG,
2701                                                   bool SNaN,
2702                                                   unsigned Depth) const {
2703   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2704           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2705           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2706           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2707          "Should use isKnownNeverNaN if you don't know whether Op"
2708          " is a target node!");
2709   return false;
2710 }
2711 
2712 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2713 // work with truncating build vectors and vectors with elements of less than
2714 // 8 bits.
2715 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2716   if (!N)
2717     return false;
2718 
2719   APInt CVal;
2720   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2721     CVal = CN->getAPIntValue();
2722   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2723     auto *CN = BV->getConstantSplatNode();
2724     if (!CN)
2725       return false;
2726 
2727     // If this is a truncating build vector, truncate the splat value.
2728     // Otherwise, we may fail to match the expected values below.
2729     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2730     CVal = CN->getAPIntValue();
2731     if (BVEltWidth < CVal.getBitWidth())
2732       CVal = CVal.trunc(BVEltWidth);
2733   } else {
2734     return false;
2735   }
2736 
2737   switch (getBooleanContents(N->getValueType(0))) {
2738   case UndefinedBooleanContent:
2739     return CVal[0];
2740   case ZeroOrOneBooleanContent:
2741     return CVal.isOneValue();
2742   case ZeroOrNegativeOneBooleanContent:
2743     return CVal.isAllOnesValue();
2744   }
2745 
2746   llvm_unreachable("Invalid boolean contents");
2747 }
2748 
2749 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2750   if (!N)
2751     return false;
2752 
2753   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2754   if (!CN) {
2755     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2756     if (!BV)
2757       return false;
2758 
2759     // Only interested in constant splats, we don't care about undef
2760     // elements in identifying boolean constants and getConstantSplatNode
2761     // returns NULL if all ops are undef;
2762     CN = BV->getConstantSplatNode();
2763     if (!CN)
2764       return false;
2765   }
2766 
2767   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2768     return !CN->getAPIntValue()[0];
2769 
2770   return CN->isNullValue();
2771 }
2772 
2773 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2774                                        bool SExt) const {
2775   if (VT == MVT::i1)
2776     return N->isOne();
2777 
2778   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2779   switch (Cnt) {
2780   case TargetLowering::ZeroOrOneBooleanContent:
2781     // An extended value of 1 is always true, unless its original type is i1,
2782     // in which case it will be sign extended to -1.
2783     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
2784   case TargetLowering::UndefinedBooleanContent:
2785   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2786     return N->isAllOnesValue() && SExt;
2787   }
2788   llvm_unreachable("Unexpected enumeration.");
2789 }
2790 
2791 /// This helper function of SimplifySetCC tries to optimize the comparison when
2792 /// either operand of the SetCC node is a bitwise-and instruction.
2793 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2794                                          ISD::CondCode Cond, const SDLoc &DL,
2795                                          DAGCombinerInfo &DCI) const {
2796   // Match these patterns in any of their permutations:
2797   // (X & Y) == Y
2798   // (X & Y) != Y
2799   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
2800     std::swap(N0, N1);
2801 
2802   EVT OpVT = N0.getValueType();
2803   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
2804       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
2805     return SDValue();
2806 
2807   SDValue X, Y;
2808   if (N0.getOperand(0) == N1) {
2809     X = N0.getOperand(1);
2810     Y = N0.getOperand(0);
2811   } else if (N0.getOperand(1) == N1) {
2812     X = N0.getOperand(0);
2813     Y = N0.getOperand(1);
2814   } else {
2815     return SDValue();
2816   }
2817 
2818   SelectionDAG &DAG = DCI.DAG;
2819   SDValue Zero = DAG.getConstant(0, DL, OpVT);
2820   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2821     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2822     // Note that where Y is variable and is known to have at most one bit set
2823     // (for example, if it is Z & 1) we cannot do this; the expressions are not
2824     // equivalent when Y == 0.
2825     assert(OpVT.isInteger());
2826     Cond = ISD::getSetCCInverse(Cond, OpVT);
2827     if (DCI.isBeforeLegalizeOps() ||
2828         isCondCodeLegal(Cond, N0.getSimpleValueType()))
2829       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2830   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
2831     // If the target supports an 'and-not' or 'and-complement' logic operation,
2832     // try to use that to make a comparison operation more efficient.
2833     // But don't do this transform if the mask is a single bit because there are
2834     // more efficient ways to deal with that case (for example, 'bt' on x86 or
2835     // 'rlwinm' on PPC).
2836 
2837     // Bail out if the compare operand that we want to turn into a zero is
2838     // already a zero (otherwise, infinite loop).
2839     auto *YConst = dyn_cast<ConstantSDNode>(Y);
2840     if (YConst && YConst->isNullValue())
2841       return SDValue();
2842 
2843     // Transform this into: ~X & Y == 0.
2844     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2845     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2846     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2847   }
2848 
2849   return SDValue();
2850 }
2851 
2852 /// There are multiple IR patterns that could be checking whether certain
2853 /// truncation of a signed number would be lossy or not. The pattern which is
2854 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2855 /// We are looking for the following pattern: (KeptBits is a constant)
2856 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2857 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2858 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
2859 /// We will unfold it into the natural trunc+sext pattern:
2860 ///   ((%x << C) a>> C) dstcond %x
2861 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
2862 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2863     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2864     const SDLoc &DL) const {
2865   // We must be comparing with a constant.
2866   ConstantSDNode *C1;
2867   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2868     return SDValue();
2869 
2870   // N0 should be:  add %x, (1 << (KeptBits-1))
2871   if (N0->getOpcode() != ISD::ADD)
2872     return SDValue();
2873 
2874   // And we must be 'add'ing a constant.
2875   ConstantSDNode *C01;
2876   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2877     return SDValue();
2878 
2879   SDValue X = N0->getOperand(0);
2880   EVT XVT = X.getValueType();
2881 
2882   // Validate constants ...
2883 
2884   APInt I1 = C1->getAPIntValue();
2885 
2886   ISD::CondCode NewCond;
2887   if (Cond == ISD::CondCode::SETULT) {
2888     NewCond = ISD::CondCode::SETEQ;
2889   } else if (Cond == ISD::CondCode::SETULE) {
2890     NewCond = ISD::CondCode::SETEQ;
2891     // But need to 'canonicalize' the constant.
2892     I1 += 1;
2893   } else if (Cond == ISD::CondCode::SETUGT) {
2894     NewCond = ISD::CondCode::SETNE;
2895     // But need to 'canonicalize' the constant.
2896     I1 += 1;
2897   } else if (Cond == ISD::CondCode::SETUGE) {
2898     NewCond = ISD::CondCode::SETNE;
2899   } else
2900     return SDValue();
2901 
2902   APInt I01 = C01->getAPIntValue();
2903 
2904   auto checkConstants = [&I1, &I01]() -> bool {
2905     // Both of them must be power-of-two, and the constant from setcc is bigger.
2906     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2907   };
2908 
2909   if (checkConstants()) {
2910     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
2911   } else {
2912     // What if we invert constants? (and the target predicate)
2913     I1.negate();
2914     I01.negate();
2915     assert(XVT.isInteger());
2916     NewCond = getSetCCInverse(NewCond, XVT);
2917     if (!checkConstants())
2918       return SDValue();
2919     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
2920   }
2921 
2922   // They are power-of-two, so which bit is set?
2923   const unsigned KeptBits = I1.logBase2();
2924   const unsigned KeptBitsMinusOne = I01.logBase2();
2925 
2926   // Magic!
2927   if (KeptBits != (KeptBitsMinusOne + 1))
2928     return SDValue();
2929   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2930 
2931   // We don't want to do this in every single case.
2932   SelectionDAG &DAG = DCI.DAG;
2933   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2934           XVT, KeptBits))
2935     return SDValue();
2936 
2937   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2938   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2939 
2940   // Unfold into:  ((%x << C) a>> C) cond %x
2941   // Where 'cond' will be either 'eq' or 'ne'.
2942   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2943   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2944   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2945   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2946 
2947   return T2;
2948 }
2949 
2950 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
2951 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
2952     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
2953     DAGCombinerInfo &DCI, const SDLoc &DL) const {
2954   assert(isConstOrConstSplat(N1C) &&
2955          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
2956          "Should be a comparison with 0.");
2957   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2958          "Valid only for [in]equality comparisons.");
2959 
2960   unsigned NewShiftOpcode;
2961   SDValue X, C, Y;
2962 
2963   SelectionDAG &DAG = DCI.DAG;
2964   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2965 
2966   // Look for '(C l>>/<< Y)'.
2967   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
2968     // The shift should be one-use.
2969     if (!V.hasOneUse())
2970       return false;
2971     unsigned OldShiftOpcode = V.getOpcode();
2972     switch (OldShiftOpcode) {
2973     case ISD::SHL:
2974       NewShiftOpcode = ISD::SRL;
2975       break;
2976     case ISD::SRL:
2977       NewShiftOpcode = ISD::SHL;
2978       break;
2979     default:
2980       return false; // must be a logical shift.
2981     }
2982     // We should be shifting a constant.
2983     // FIXME: best to use isConstantOrConstantVector().
2984     C = V.getOperand(0);
2985     ConstantSDNode *CC =
2986         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2987     if (!CC)
2988       return false;
2989     Y = V.getOperand(1);
2990 
2991     ConstantSDNode *XC =
2992         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2993     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2994         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
2995   };
2996 
2997   // LHS of comparison should be an one-use 'and'.
2998   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
2999     return SDValue();
3000 
3001   X = N0.getOperand(0);
3002   SDValue Mask = N0.getOperand(1);
3003 
3004   // 'and' is commutative!
3005   if (!Match(Mask)) {
3006     std::swap(X, Mask);
3007     if (!Match(Mask))
3008       return SDValue();
3009   }
3010 
3011   EVT VT = X.getValueType();
3012 
3013   // Produce:
3014   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3015   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3016   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3017   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3018   return T2;
3019 }
3020 
3021 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3022 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3023 /// handle the commuted versions of these patterns.
3024 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3025                                            ISD::CondCode Cond, const SDLoc &DL,
3026                                            DAGCombinerInfo &DCI) const {
3027   unsigned BOpcode = N0.getOpcode();
3028   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3029          "Unexpected binop");
3030   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3031 
3032   // (X + Y) == X --> Y == 0
3033   // (X - Y) == X --> Y == 0
3034   // (X ^ Y) == X --> Y == 0
3035   SelectionDAG &DAG = DCI.DAG;
3036   EVT OpVT = N0.getValueType();
3037   SDValue X = N0.getOperand(0);
3038   SDValue Y = N0.getOperand(1);
3039   if (X == N1)
3040     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3041 
3042   if (Y != N1)
3043     return SDValue();
3044 
3045   // (X + Y) == Y --> X == 0
3046   // (X ^ Y) == Y --> X == 0
3047   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3048     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3049 
3050   // The shift would not be valid if the operands are boolean (i1).
3051   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3052     return SDValue();
3053 
3054   // (X - Y) == Y --> X == Y << 1
3055   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3056                                  !DCI.isBeforeLegalize());
3057   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3058   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3059   if (!DCI.isCalledByLegalizer())
3060     DCI.AddToWorklist(YShl1.getNode());
3061   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3062 }
3063 
3064 /// Try to simplify a setcc built with the specified operands and cc. If it is
3065 /// unable to simplify it, return a null SDValue.
3066 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3067                                       ISD::CondCode Cond, bool foldBooleans,
3068                                       DAGCombinerInfo &DCI,
3069                                       const SDLoc &dl) const {
3070   SelectionDAG &DAG = DCI.DAG;
3071   const DataLayout &Layout = DAG.getDataLayout();
3072   EVT OpVT = N0.getValueType();
3073 
3074   // Constant fold or commute setcc.
3075   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3076     return Fold;
3077 
3078   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3079   // TODO: Handle non-splat vector constants. All undef causes trouble.
3080   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3081   if (isConstOrConstSplat(N0) &&
3082       (DCI.isBeforeLegalizeOps() ||
3083        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3084     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3085 
3086   // If we have a subtract with the same 2 non-constant operands as this setcc
3087   // -- but in reverse order -- then try to commute the operands of this setcc
3088   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3089   // instruction on some targets.
3090   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3091       (DCI.isBeforeLegalizeOps() ||
3092        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3093       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3094       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3095     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3096 
3097   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3098     const APInt &C1 = N1C->getAPIntValue();
3099 
3100     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3101     // equality comparison, then we're just comparing whether X itself is
3102     // zero.
3103     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3104         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3105         N0.getOperand(1).getOpcode() == ISD::Constant) {
3106       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3107       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3108           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3109         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3110           // (srl (ctlz x), 5) == 0  -> X != 0
3111           // (srl (ctlz x), 5) != 1  -> X != 0
3112           Cond = ISD::SETNE;
3113         } else {
3114           // (srl (ctlz x), 5) != 0  -> X == 0
3115           // (srl (ctlz x), 5) == 1  -> X == 0
3116           Cond = ISD::SETEQ;
3117         }
3118         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3119         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3120                             Zero, Cond);
3121       }
3122     }
3123 
3124     SDValue CTPOP = N0;
3125     // Look through truncs that don't change the value of a ctpop.
3126     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3127       CTPOP = N0.getOperand(0);
3128 
3129     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3130         (N0 == CTPOP ||
3131          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3132       EVT CTVT = CTPOP.getValueType();
3133       SDValue CTOp = CTPOP.getOperand(0);
3134 
3135       // (ctpop x) u< 2 -> (x & x-1) == 0
3136       // (ctpop x) u> 1 -> (x & x-1) != 0
3137       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3138         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3139         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3140         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3141         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3142         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3143       }
3144 
3145       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3146       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3147           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3148         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3149         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3150         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3151         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3152         assert(CTVT.isInteger());
3153         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3154         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3155         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3156         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3157         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3158         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3159         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3160       }
3161     }
3162 
3163     // (zext x) == C --> x == (trunc C)
3164     // (sext x) == C --> x == (trunc C)
3165     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3166         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3167       unsigned MinBits = N0.getValueSizeInBits();
3168       SDValue PreExt;
3169       bool Signed = false;
3170       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3171         // ZExt
3172         MinBits = N0->getOperand(0).getValueSizeInBits();
3173         PreExt = N0->getOperand(0);
3174       } else if (N0->getOpcode() == ISD::AND) {
3175         // DAGCombine turns costly ZExts into ANDs
3176         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3177           if ((C->getAPIntValue()+1).isPowerOf2()) {
3178             MinBits = C->getAPIntValue().countTrailingOnes();
3179             PreExt = N0->getOperand(0);
3180           }
3181       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3182         // SExt
3183         MinBits = N0->getOperand(0).getValueSizeInBits();
3184         PreExt = N0->getOperand(0);
3185         Signed = true;
3186       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3187         // ZEXTLOAD / SEXTLOAD
3188         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3189           MinBits = LN0->getMemoryVT().getSizeInBits();
3190           PreExt = N0;
3191         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3192           Signed = true;
3193           MinBits = LN0->getMemoryVT().getSizeInBits();
3194           PreExt = N0;
3195         }
3196       }
3197 
3198       // Figure out how many bits we need to preserve this constant.
3199       unsigned ReqdBits = Signed ?
3200         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3201         C1.getActiveBits();
3202 
3203       // Make sure we're not losing bits from the constant.
3204       if (MinBits > 0 &&
3205           MinBits < C1.getBitWidth() &&
3206           MinBits >= ReqdBits) {
3207         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3208         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3209           // Will get folded away.
3210           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3211           if (MinBits == 1 && C1 == 1)
3212             // Invert the condition.
3213             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3214                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3215           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3216           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3217         }
3218 
3219         // If truncating the setcc operands is not desirable, we can still
3220         // simplify the expression in some cases:
3221         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3222         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3223         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3224         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3225         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3226         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3227         SDValue TopSetCC = N0->getOperand(0);
3228         unsigned N0Opc = N0->getOpcode();
3229         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3230         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3231             TopSetCC.getOpcode() == ISD::SETCC &&
3232             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3233             (isConstFalseVal(N1C) ||
3234              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3235 
3236           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3237                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3238 
3239           if (!Inverse)
3240             return TopSetCC;
3241 
3242           ISD::CondCode InvCond = ISD::getSetCCInverse(
3243               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3244               TopSetCC.getOperand(0).getValueType());
3245           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3246                                       TopSetCC.getOperand(1),
3247                                       InvCond);
3248         }
3249       }
3250     }
3251 
3252     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3253     // equality or unsigned, and all 1 bits of the const are in the same
3254     // partial word, see if we can shorten the load.
3255     if (DCI.isBeforeLegalize() &&
3256         !ISD::isSignedIntSetCC(Cond) &&
3257         N0.getOpcode() == ISD::AND && C1 == 0 &&
3258         N0.getNode()->hasOneUse() &&
3259         isa<LoadSDNode>(N0.getOperand(0)) &&
3260         N0.getOperand(0).getNode()->hasOneUse() &&
3261         isa<ConstantSDNode>(N0.getOperand(1))) {
3262       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3263       APInt bestMask;
3264       unsigned bestWidth = 0, bestOffset = 0;
3265       if (Lod->isSimple() && Lod->isUnindexed()) {
3266         unsigned origWidth = N0.getValueSizeInBits();
3267         unsigned maskWidth = origWidth;
3268         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3269         // 8 bits, but have to be careful...
3270         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3271           origWidth = Lod->getMemoryVT().getSizeInBits();
3272         const APInt &Mask = N0.getConstantOperandAPInt(1);
3273         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3274           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3275           for (unsigned offset=0; offset<origWidth/width; offset++) {
3276             if (Mask.isSubsetOf(newMask)) {
3277               if (Layout.isLittleEndian())
3278                 bestOffset = (uint64_t)offset * (width/8);
3279               else
3280                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3281               bestMask = Mask.lshr(offset * (width/8) * 8);
3282               bestWidth = width;
3283               break;
3284             }
3285             newMask <<= width;
3286           }
3287         }
3288       }
3289       if (bestWidth) {
3290         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3291         if (newVT.isRound() &&
3292             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3293           SDValue Ptr = Lod->getBasePtr();
3294           if (bestOffset != 0)
3295             Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
3296           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3297           SDValue NewLoad = DAG.getLoad(
3298               newVT, dl, Lod->getChain(), Ptr,
3299               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3300           return DAG.getSetCC(dl, VT,
3301                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3302                                       DAG.getConstant(bestMask.trunc(bestWidth),
3303                                                       dl, newVT)),
3304                               DAG.getConstant(0LL, dl, newVT), Cond);
3305         }
3306       }
3307     }
3308 
3309     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3310     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3311       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3312 
3313       // If the comparison constant has bits in the upper part, the
3314       // zero-extended value could never match.
3315       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3316                                               C1.getBitWidth() - InSize))) {
3317         switch (Cond) {
3318         case ISD::SETUGT:
3319         case ISD::SETUGE:
3320         case ISD::SETEQ:
3321           return DAG.getConstant(0, dl, VT);
3322         case ISD::SETULT:
3323         case ISD::SETULE:
3324         case ISD::SETNE:
3325           return DAG.getConstant(1, dl, VT);
3326         case ISD::SETGT:
3327         case ISD::SETGE:
3328           // True if the sign bit of C1 is set.
3329           return DAG.getConstant(C1.isNegative(), dl, VT);
3330         case ISD::SETLT:
3331         case ISD::SETLE:
3332           // True if the sign bit of C1 isn't set.
3333           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3334         default:
3335           break;
3336         }
3337       }
3338 
3339       // Otherwise, we can perform the comparison with the low bits.
3340       switch (Cond) {
3341       case ISD::SETEQ:
3342       case ISD::SETNE:
3343       case ISD::SETUGT:
3344       case ISD::SETUGE:
3345       case ISD::SETULT:
3346       case ISD::SETULE: {
3347         EVT newVT = N0.getOperand(0).getValueType();
3348         if (DCI.isBeforeLegalizeOps() ||
3349             (isOperationLegal(ISD::SETCC, newVT) &&
3350              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3351           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3352           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3353 
3354           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3355                                           NewConst, Cond);
3356           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3357         }
3358         break;
3359       }
3360       default:
3361         break; // todo, be more careful with signed comparisons
3362       }
3363     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3364                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3365       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3366       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3367       EVT ExtDstTy = N0.getValueType();
3368       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3369 
3370       // If the constant doesn't fit into the number of bits for the source of
3371       // the sign extension, it is impossible for both sides to be equal.
3372       if (C1.getMinSignedBits() > ExtSrcTyBits)
3373         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3374 
3375       SDValue ZextOp;
3376       EVT Op0Ty = N0.getOperand(0).getValueType();
3377       if (Op0Ty == ExtSrcTy) {
3378         ZextOp = N0.getOperand(0);
3379       } else {
3380         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3381         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3382                              DAG.getConstant(Imm, dl, Op0Ty));
3383       }
3384       if (!DCI.isCalledByLegalizer())
3385         DCI.AddToWorklist(ZextOp.getNode());
3386       // Otherwise, make this a use of a zext.
3387       return DAG.getSetCC(dl, VT, ZextOp,
3388                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3389                                                               ExtDstTyBits,
3390                                                               ExtSrcTyBits),
3391                                           dl, ExtDstTy),
3392                           Cond);
3393     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3394                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3395       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3396       if (N0.getOpcode() == ISD::SETCC &&
3397           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3398           (N0.getValueType() == MVT::i1 ||
3399            getBooleanContents(N0.getOperand(0).getValueType()) ==
3400                        ZeroOrOneBooleanContent)) {
3401         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3402         if (TrueWhenTrue)
3403           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3404         // Invert the condition.
3405         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3406         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3407         if (DCI.isBeforeLegalizeOps() ||
3408             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3409           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3410       }
3411 
3412       if ((N0.getOpcode() == ISD::XOR ||
3413            (N0.getOpcode() == ISD::AND &&
3414             N0.getOperand(0).getOpcode() == ISD::XOR &&
3415             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3416           isa<ConstantSDNode>(N0.getOperand(1)) &&
3417           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3418         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3419         // can only do this if the top bits are known zero.
3420         unsigned BitWidth = N0.getValueSizeInBits();
3421         if (DAG.MaskedValueIsZero(N0,
3422                                   APInt::getHighBitsSet(BitWidth,
3423                                                         BitWidth-1))) {
3424           // Okay, get the un-inverted input value.
3425           SDValue Val;
3426           if (N0.getOpcode() == ISD::XOR) {
3427             Val = N0.getOperand(0);
3428           } else {
3429             assert(N0.getOpcode() == ISD::AND &&
3430                     N0.getOperand(0).getOpcode() == ISD::XOR);
3431             // ((X^1)&1)^1 -> X & 1
3432             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3433                               N0.getOperand(0).getOperand(0),
3434                               N0.getOperand(1));
3435           }
3436 
3437           return DAG.getSetCC(dl, VT, Val, N1,
3438                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3439         }
3440       } else if (N1C->isOne()) {
3441         SDValue Op0 = N0;
3442         if (Op0.getOpcode() == ISD::TRUNCATE)
3443           Op0 = Op0.getOperand(0);
3444 
3445         if ((Op0.getOpcode() == ISD::XOR) &&
3446             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3447             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3448           SDValue XorLHS = Op0.getOperand(0);
3449           SDValue XorRHS = Op0.getOperand(1);
3450           // Ensure that the input setccs return an i1 type or 0/1 value.
3451           if (Op0.getValueType() == MVT::i1 ||
3452               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3453                       ZeroOrOneBooleanContent &&
3454                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3455                         ZeroOrOneBooleanContent)) {
3456             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3457             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3458             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3459           }
3460         }
3461         if (Op0.getOpcode() == ISD::AND &&
3462             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3463             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3464           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3465           if (Op0.getValueType().bitsGT(VT))
3466             Op0 = DAG.getNode(ISD::AND, dl, VT,
3467                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3468                           DAG.getConstant(1, dl, VT));
3469           else if (Op0.getValueType().bitsLT(VT))
3470             Op0 = DAG.getNode(ISD::AND, dl, VT,
3471                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3472                         DAG.getConstant(1, dl, VT));
3473 
3474           return DAG.getSetCC(dl, VT, Op0,
3475                               DAG.getConstant(0, dl, Op0.getValueType()),
3476                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3477         }
3478         if (Op0.getOpcode() == ISD::AssertZext &&
3479             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3480           return DAG.getSetCC(dl, VT, Op0,
3481                               DAG.getConstant(0, dl, Op0.getValueType()),
3482                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3483       }
3484     }
3485 
3486     // Given:
3487     //   icmp eq/ne (urem %x, %y), 0
3488     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3489     //   icmp eq/ne %x, 0
3490     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3491         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3492       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3493       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3494       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3495         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3496     }
3497 
3498     if (SDValue V =
3499             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3500       return V;
3501   }
3502 
3503   // These simplifications apply to splat vectors as well.
3504   // TODO: Handle more splat vector cases.
3505   if (auto *N1C = isConstOrConstSplat(N1)) {
3506     const APInt &C1 = N1C->getAPIntValue();
3507 
3508     APInt MinVal, MaxVal;
3509     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3510     if (ISD::isSignedIntSetCC(Cond)) {
3511       MinVal = APInt::getSignedMinValue(OperandBitSize);
3512       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3513     } else {
3514       MinVal = APInt::getMinValue(OperandBitSize);
3515       MaxVal = APInt::getMaxValue(OperandBitSize);
3516     }
3517 
3518     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3519     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3520       // X >= MIN --> true
3521       if (C1 == MinVal)
3522         return DAG.getBoolConstant(true, dl, VT, OpVT);
3523 
3524       if (!VT.isVector()) { // TODO: Support this for vectors.
3525         // X >= C0 --> X > (C0 - 1)
3526         APInt C = C1 - 1;
3527         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3528         if ((DCI.isBeforeLegalizeOps() ||
3529              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3530             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3531                                   isLegalICmpImmediate(C.getSExtValue())))) {
3532           return DAG.getSetCC(dl, VT, N0,
3533                               DAG.getConstant(C, dl, N1.getValueType()),
3534                               NewCC);
3535         }
3536       }
3537     }
3538 
3539     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3540       // X <= MAX --> true
3541       if (C1 == MaxVal)
3542         return DAG.getBoolConstant(true, dl, VT, OpVT);
3543 
3544       // X <= C0 --> X < (C0 + 1)
3545       if (!VT.isVector()) { // TODO: Support this for vectors.
3546         APInt C = C1 + 1;
3547         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3548         if ((DCI.isBeforeLegalizeOps() ||
3549              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3550             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3551                                   isLegalICmpImmediate(C.getSExtValue())))) {
3552           return DAG.getSetCC(dl, VT, N0,
3553                               DAG.getConstant(C, dl, N1.getValueType()),
3554                               NewCC);
3555         }
3556       }
3557     }
3558 
3559     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3560       if (C1 == MinVal)
3561         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3562 
3563       // TODO: Support this for vectors after legalize ops.
3564       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3565         // Canonicalize setlt X, Max --> setne X, Max
3566         if (C1 == MaxVal)
3567           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3568 
3569         // If we have setult X, 1, turn it into seteq X, 0
3570         if (C1 == MinVal+1)
3571           return DAG.getSetCC(dl, VT, N0,
3572                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3573                               ISD::SETEQ);
3574       }
3575     }
3576 
3577     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3578       if (C1 == MaxVal)
3579         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3580 
3581       // TODO: Support this for vectors after legalize ops.
3582       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3583         // Canonicalize setgt X, Min --> setne X, Min
3584         if (C1 == MinVal)
3585           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3586 
3587         // If we have setugt X, Max-1, turn it into seteq X, Max
3588         if (C1 == MaxVal-1)
3589           return DAG.getSetCC(dl, VT, N0,
3590                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3591                               ISD::SETEQ);
3592       }
3593     }
3594 
3595     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3596       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3597       if (C1.isNullValue())
3598         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3599                 VT, N0, N1, Cond, DCI, dl))
3600           return CC;
3601     }
3602 
3603     // If we have "setcc X, C0", check to see if we can shrink the immediate
3604     // by changing cc.
3605     // TODO: Support this for vectors after legalize ops.
3606     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3607       // SETUGT X, SINTMAX  -> SETLT X, 0
3608       if (Cond == ISD::SETUGT &&
3609           C1 == APInt::getSignedMaxValue(OperandBitSize))
3610         return DAG.getSetCC(dl, VT, N0,
3611                             DAG.getConstant(0, dl, N1.getValueType()),
3612                             ISD::SETLT);
3613 
3614       // SETULT X, SINTMIN  -> SETGT X, -1
3615       if (Cond == ISD::SETULT &&
3616           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3617         SDValue ConstMinusOne =
3618             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3619                             N1.getValueType());
3620         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3621       }
3622     }
3623   }
3624 
3625   // Back to non-vector simplifications.
3626   // TODO: Can we do these for vector splats?
3627   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3628     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3629     const APInt &C1 = N1C->getAPIntValue();
3630     EVT ShValTy = N0.getValueType();
3631 
3632     // Fold bit comparisons when we can.
3633     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3634         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3635         N0.getOpcode() == ISD::AND) {
3636       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3637         EVT ShiftTy =
3638             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3639         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3640           // Perform the xform if the AND RHS is a single bit.
3641           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3642           if (AndRHS->getAPIntValue().isPowerOf2() &&
3643               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3644             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3645                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3646                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3647           }
3648         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3649           // (X & 8) == 8  -->  (X & 8) >> 3
3650           // Perform the xform if C1 is a single bit.
3651           unsigned ShCt = C1.logBase2();
3652           if (C1.isPowerOf2() &&
3653               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3654             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3655                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3656                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3657           }
3658         }
3659       }
3660     }
3661 
3662     if (C1.getMinSignedBits() <= 64 &&
3663         !isLegalICmpImmediate(C1.getSExtValue())) {
3664       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3665       // (X & -256) == 256 -> (X >> 8) == 1
3666       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3667           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3668         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3669           const APInt &AndRHSC = AndRHS->getAPIntValue();
3670           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3671             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3672             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3673               SDValue Shift =
3674                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3675                             DAG.getConstant(ShiftBits, dl, ShiftTy));
3676               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3677               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3678             }
3679           }
3680         }
3681       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3682                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3683         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3684         // X <  0x100000000 -> (X >> 32) <  1
3685         // X >= 0x100000000 -> (X >> 32) >= 1
3686         // X <= 0x0ffffffff -> (X >> 32) <  1
3687         // X >  0x0ffffffff -> (X >> 32) >= 1
3688         unsigned ShiftBits;
3689         APInt NewC = C1;
3690         ISD::CondCode NewCond = Cond;
3691         if (AdjOne) {
3692           ShiftBits = C1.countTrailingOnes();
3693           NewC = NewC + 1;
3694           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3695         } else {
3696           ShiftBits = C1.countTrailingZeros();
3697         }
3698         NewC.lshrInPlace(ShiftBits);
3699         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3700             isLegalICmpImmediate(NewC.getSExtValue()) &&
3701             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3702           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3703                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3704           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3705           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3706         }
3707       }
3708     }
3709   }
3710 
3711   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3712     auto *CFP = cast<ConstantFPSDNode>(N1);
3713     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3714 
3715     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3716     // constant if knowing that the operand is non-nan is enough.  We prefer to
3717     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3718     // materialize 0.0.
3719     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3720       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3721 
3722     // setcc (fneg x), C -> setcc swap(pred) x, -C
3723     if (N0.getOpcode() == ISD::FNEG) {
3724       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3725       if (DCI.isBeforeLegalizeOps() ||
3726           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3727         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3728         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3729       }
3730     }
3731 
3732     // If the condition is not legal, see if we can find an equivalent one
3733     // which is legal.
3734     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3735       // If the comparison was an awkward floating-point == or != and one of
3736       // the comparison operands is infinity or negative infinity, convert the
3737       // condition to a less-awkward <= or >=.
3738       if (CFP->getValueAPF().isInfinity()) {
3739         if (CFP->getValueAPF().isNegative()) {
3740           if (Cond == ISD::SETOEQ &&
3741               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3742             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3743           if (Cond == ISD::SETUEQ &&
3744               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3745             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3746           if (Cond == ISD::SETUNE &&
3747               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3748             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3749           if (Cond == ISD::SETONE &&
3750               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3751             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3752         } else {
3753           if (Cond == ISD::SETOEQ &&
3754               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3755             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3756           if (Cond == ISD::SETUEQ &&
3757               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3758             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3759           if (Cond == ISD::SETUNE &&
3760               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3761             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3762           if (Cond == ISD::SETONE &&
3763               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3764             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3765         }
3766       }
3767     }
3768   }
3769 
3770   if (N0 == N1) {
3771     // The sext(setcc()) => setcc() optimization relies on the appropriate
3772     // constant being emitted.
3773     assert(!N0.getValueType().isInteger() &&
3774            "Integer types should be handled by FoldSetCC");
3775 
3776     bool EqTrue = ISD::isTrueWhenEqual(Cond);
3777     unsigned UOF = ISD::getUnorderedFlavor(Cond);
3778     if (UOF == 2) // FP operators that are undefined on NaNs.
3779       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3780     if (UOF == unsigned(EqTrue))
3781       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3782     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
3783     // if it is not already.
3784     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
3785     if (NewCond != Cond &&
3786         (DCI.isBeforeLegalizeOps() ||
3787                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
3788       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3789   }
3790 
3791   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3792       N0.getValueType().isInteger()) {
3793     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
3794         N0.getOpcode() == ISD::XOR) {
3795       // Simplify (X+Y) == (X+Z) -->  Y == Z
3796       if (N0.getOpcode() == N1.getOpcode()) {
3797         if (N0.getOperand(0) == N1.getOperand(0))
3798           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3799         if (N0.getOperand(1) == N1.getOperand(1))
3800           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3801         if (isCommutativeBinOp(N0.getOpcode())) {
3802           // If X op Y == Y op X, try other combinations.
3803           if (N0.getOperand(0) == N1.getOperand(1))
3804             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3805                                 Cond);
3806           if (N0.getOperand(1) == N1.getOperand(0))
3807             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3808                                 Cond);
3809         }
3810       }
3811 
3812       // If RHS is a legal immediate value for a compare instruction, we need
3813       // to be careful about increasing register pressure needlessly.
3814       bool LegalRHSImm = false;
3815 
3816       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3817         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3818           // Turn (X+C1) == C2 --> X == C2-C1
3819           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
3820             return DAG.getSetCC(dl, VT, N0.getOperand(0),
3821                                 DAG.getConstant(RHSC->getAPIntValue()-
3822                                                 LHSR->getAPIntValue(),
3823                                 dl, N0.getValueType()), Cond);
3824           }
3825 
3826           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3827           if (N0.getOpcode() == ISD::XOR)
3828             // If we know that all of the inverted bits are zero, don't bother
3829             // performing the inversion.
3830             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3831               return
3832                 DAG.getSetCC(dl, VT, N0.getOperand(0),
3833                              DAG.getConstant(LHSR->getAPIntValue() ^
3834                                                RHSC->getAPIntValue(),
3835                                              dl, N0.getValueType()),
3836                              Cond);
3837         }
3838 
3839         // Turn (C1-X) == C2 --> X == C1-C2
3840         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3841           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3842             return
3843               DAG.getSetCC(dl, VT, N0.getOperand(1),
3844                            DAG.getConstant(SUBC->getAPIntValue() -
3845                                              RHSC->getAPIntValue(),
3846                                            dl, N0.getValueType()),
3847                            Cond);
3848           }
3849         }
3850 
3851         // Could RHSC fold directly into a compare?
3852         if (RHSC->getValueType(0).getSizeInBits() <= 64)
3853           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3854       }
3855 
3856       // (X+Y) == X --> Y == 0 and similar folds.
3857       // Don't do this if X is an immediate that can fold into a cmp
3858       // instruction and X+Y has other uses. It could be an induction variable
3859       // chain, and the transform would increase register pressure.
3860       if (!LegalRHSImm || N0.hasOneUse())
3861         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3862           return V;
3863     }
3864 
3865     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
3866         N1.getOpcode() == ISD::XOR)
3867       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3868         return V;
3869 
3870     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3871       return V;
3872   }
3873 
3874   // Fold remainder of division by a constant.
3875   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
3876       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3877     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3878 
3879     // When division is cheap or optimizing for minimum size,
3880     // fall through to DIVREM creation by skipping this fold.
3881     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
3882       if (N0.getOpcode() == ISD::UREM) {
3883         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3884           return Folded;
3885       } else if (N0.getOpcode() == ISD::SREM) {
3886         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
3887           return Folded;
3888       }
3889     }
3890   }
3891 
3892   // Fold away ALL boolean setcc's.
3893   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
3894     SDValue Temp;
3895     switch (Cond) {
3896     default: llvm_unreachable("Unknown integer setcc!");
3897     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
3898       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3899       N0 = DAG.getNOT(dl, Temp, OpVT);
3900       if (!DCI.isCalledByLegalizer())
3901         DCI.AddToWorklist(Temp.getNode());
3902       break;
3903     case ISD::SETNE:  // X != Y   -->  (X^Y)
3904       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3905       break;
3906     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
3907     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
3908       Temp = DAG.getNOT(dl, N0, OpVT);
3909       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3910       if (!DCI.isCalledByLegalizer())
3911         DCI.AddToWorklist(Temp.getNode());
3912       break;
3913     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
3914     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
3915       Temp = DAG.getNOT(dl, N1, OpVT);
3916       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3917       if (!DCI.isCalledByLegalizer())
3918         DCI.AddToWorklist(Temp.getNode());
3919       break;
3920     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
3921     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
3922       Temp = DAG.getNOT(dl, N0, OpVT);
3923       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3924       if (!DCI.isCalledByLegalizer())
3925         DCI.AddToWorklist(Temp.getNode());
3926       break;
3927     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
3928     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
3929       Temp = DAG.getNOT(dl, N1, OpVT);
3930       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3931       break;
3932     }
3933     if (VT.getScalarType() != MVT::i1) {
3934       if (!DCI.isCalledByLegalizer())
3935         DCI.AddToWorklist(N0.getNode());
3936       // FIXME: If running after legalize, we probably can't do this.
3937       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3938       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3939     }
3940     return N0;
3941   }
3942 
3943   // Could not fold it.
3944   return SDValue();
3945 }
3946 
3947 /// Returns true (and the GlobalValue and the offset) if the node is a
3948 /// GlobalAddress + offset.
3949 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
3950                                     int64_t &Offset) const {
3951 
3952   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
3953 
3954   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
3955     GA = GASD->getGlobal();
3956     Offset += GASD->getOffset();
3957     return true;
3958   }
3959 
3960   if (N->getOpcode() == ISD::ADD) {
3961     SDValue N1 = N->getOperand(0);
3962     SDValue N2 = N->getOperand(1);
3963     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
3964       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
3965         Offset += V->getSExtValue();
3966         return true;
3967       }
3968     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
3969       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
3970         Offset += V->getSExtValue();
3971         return true;
3972       }
3973     }
3974   }
3975 
3976   return false;
3977 }
3978 
3979 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
3980                                           DAGCombinerInfo &DCI) const {
3981   // Default implementation: no optimization.
3982   return SDValue();
3983 }
3984 
3985 //===----------------------------------------------------------------------===//
3986 //  Inline Assembler Implementation Methods
3987 //===----------------------------------------------------------------------===//
3988 
3989 TargetLowering::ConstraintType
3990 TargetLowering::getConstraintType(StringRef Constraint) const {
3991   unsigned S = Constraint.size();
3992 
3993   if (S == 1) {
3994     switch (Constraint[0]) {
3995     default: break;
3996     case 'r':
3997       return C_RegisterClass;
3998     case 'm': // memory
3999     case 'o': // offsetable
4000     case 'V': // not offsetable
4001       return C_Memory;
4002     case 'n': // Simple Integer
4003     case 'E': // Floating Point Constant
4004     case 'F': // Floating Point Constant
4005       return C_Immediate;
4006     case 'i': // Simple Integer or Relocatable Constant
4007     case 's': // Relocatable Constant
4008     case 'p': // Address.
4009     case 'X': // Allow ANY value.
4010     case 'I': // Target registers.
4011     case 'J':
4012     case 'K':
4013     case 'L':
4014     case 'M':
4015     case 'N':
4016     case 'O':
4017     case 'P':
4018     case '<':
4019     case '>':
4020       return C_Other;
4021     }
4022   }
4023 
4024   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4025     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4026       return C_Memory;
4027     return C_Register;
4028   }
4029   return C_Unknown;
4030 }
4031 
4032 /// Try to replace an X constraint, which matches anything, with another that
4033 /// has more specific requirements based on the type of the corresponding
4034 /// operand.
4035 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4036   if (ConstraintVT.isInteger())
4037     return "r";
4038   if (ConstraintVT.isFloatingPoint())
4039     return "f"; // works for many targets
4040   return nullptr;
4041 }
4042 
4043 SDValue TargetLowering::LowerAsmOutputForConstraint(
4044     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4045     SelectionDAG &DAG) const {
4046   return SDValue();
4047 }
4048 
4049 /// Lower the specified operand into the Ops vector.
4050 /// If it is invalid, don't add anything to Ops.
4051 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4052                                                   std::string &Constraint,
4053                                                   std::vector<SDValue> &Ops,
4054                                                   SelectionDAG &DAG) const {
4055 
4056   if (Constraint.length() > 1) return;
4057 
4058   char ConstraintLetter = Constraint[0];
4059   switch (ConstraintLetter) {
4060   default: break;
4061   case 'X':     // Allows any operand; labels (basic block) use this.
4062     if (Op.getOpcode() == ISD::BasicBlock ||
4063         Op.getOpcode() == ISD::TargetBlockAddress) {
4064       Ops.push_back(Op);
4065       return;
4066     }
4067     LLVM_FALLTHROUGH;
4068   case 'i':    // Simple Integer or Relocatable Constant
4069   case 'n':    // Simple Integer
4070   case 's': {  // Relocatable Constant
4071 
4072     GlobalAddressSDNode *GA;
4073     ConstantSDNode *C;
4074     BlockAddressSDNode *BA;
4075     uint64_t Offset = 0;
4076 
4077     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4078     // etc., since getelementpointer is variadic. We can't use
4079     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4080     // while in this case the GA may be furthest from the root node which is
4081     // likely an ISD::ADD.
4082     while (1) {
4083       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4084         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4085                                                  GA->getValueType(0),
4086                                                  Offset + GA->getOffset()));
4087         return;
4088       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4089                  ConstraintLetter != 's') {
4090         // gcc prints these as sign extended.  Sign extend value to 64 bits
4091         // now; without this it would get ZExt'd later in
4092         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4093         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4094         BooleanContent BCont = getBooleanContents(MVT::i64);
4095         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4096                                       : ISD::SIGN_EXTEND;
4097         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4098                                                     : C->getSExtValue();
4099         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4100                                             SDLoc(C), MVT::i64));
4101         return;
4102       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4103                  ConstraintLetter != 'n') {
4104         Ops.push_back(DAG.getTargetBlockAddress(
4105             BA->getBlockAddress(), BA->getValueType(0),
4106             Offset + BA->getOffset(), BA->getTargetFlags()));
4107         return;
4108       } else {
4109         const unsigned OpCode = Op.getOpcode();
4110         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4111           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4112             Op = Op.getOperand(1);
4113           // Subtraction is not commutative.
4114           else if (OpCode == ISD::ADD &&
4115                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4116             Op = Op.getOperand(0);
4117           else
4118             return;
4119           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4120           continue;
4121         }
4122       }
4123       return;
4124     }
4125     break;
4126   }
4127   }
4128 }
4129 
4130 std::pair<unsigned, const TargetRegisterClass *>
4131 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4132                                              StringRef Constraint,
4133                                              MVT VT) const {
4134   if (Constraint.empty() || Constraint[0] != '{')
4135     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4136   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4137 
4138   // Remove the braces from around the name.
4139   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4140 
4141   std::pair<unsigned, const TargetRegisterClass *> R =
4142       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4143 
4144   // Figure out which register class contains this reg.
4145   for (const TargetRegisterClass *RC : RI->regclasses()) {
4146     // If none of the value types for this register class are valid, we
4147     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4148     if (!isLegalRC(*RI, *RC))
4149       continue;
4150 
4151     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4152          I != E; ++I) {
4153       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4154         std::pair<unsigned, const TargetRegisterClass *> S =
4155             std::make_pair(*I, RC);
4156 
4157         // If this register class has the requested value type, return it,
4158         // otherwise keep searching and return the first class found
4159         // if no other is found which explicitly has the requested type.
4160         if (RI->isTypeLegalForClass(*RC, VT))
4161           return S;
4162         if (!R.second)
4163           R = S;
4164       }
4165     }
4166   }
4167 
4168   return R;
4169 }
4170 
4171 //===----------------------------------------------------------------------===//
4172 // Constraint Selection.
4173 
4174 /// Return true of this is an input operand that is a matching constraint like
4175 /// "4".
4176 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4177   assert(!ConstraintCode.empty() && "No known constraint!");
4178   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4179 }
4180 
4181 /// If this is an input matching constraint, this method returns the output
4182 /// operand it matches.
4183 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4184   assert(!ConstraintCode.empty() && "No known constraint!");
4185   return atoi(ConstraintCode.c_str());
4186 }
4187 
4188 /// Split up the constraint string from the inline assembly value into the
4189 /// specific constraints and their prefixes, and also tie in the associated
4190 /// operand values.
4191 /// If this returns an empty vector, and if the constraint string itself
4192 /// isn't empty, there was an error parsing.
4193 TargetLowering::AsmOperandInfoVector
4194 TargetLowering::ParseConstraints(const DataLayout &DL,
4195                                  const TargetRegisterInfo *TRI,
4196                                  ImmutableCallSite CS) const {
4197   /// Information about all of the constraints.
4198   AsmOperandInfoVector ConstraintOperands;
4199   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4200   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4201 
4202   // Do a prepass over the constraints, canonicalizing them, and building up the
4203   // ConstraintOperands list.
4204   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4205   unsigned ResNo = 0; // ResNo - The result number of the next output.
4206 
4207   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4208     ConstraintOperands.emplace_back(std::move(CI));
4209     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4210 
4211     // Update multiple alternative constraint count.
4212     if (OpInfo.multipleAlternatives.size() > maCount)
4213       maCount = OpInfo.multipleAlternatives.size();
4214 
4215     OpInfo.ConstraintVT = MVT::Other;
4216 
4217     // Compute the value type for each operand.
4218     switch (OpInfo.Type) {
4219     case InlineAsm::isOutput:
4220       // Indirect outputs just consume an argument.
4221       if (OpInfo.isIndirect) {
4222         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4223         break;
4224       }
4225 
4226       // The return value of the call is this value.  As such, there is no
4227       // corresponding argument.
4228       assert(!CS.getType()->isVoidTy() &&
4229              "Bad inline asm!");
4230       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
4231         OpInfo.ConstraintVT =
4232             getSimpleValueType(DL, STy->getElementType(ResNo));
4233       } else {
4234         assert(ResNo == 0 && "Asm only has one result!");
4235         OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
4236       }
4237       ++ResNo;
4238       break;
4239     case InlineAsm::isInput:
4240       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4241       break;
4242     case InlineAsm::isClobber:
4243       // Nothing to do.
4244       break;
4245     }
4246 
4247     if (OpInfo.CallOperandVal) {
4248       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4249       if (OpInfo.isIndirect) {
4250         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4251         if (!PtrTy)
4252           report_fatal_error("Indirect operand for inline asm not a pointer!");
4253         OpTy = PtrTy->getElementType();
4254       }
4255 
4256       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4257       if (StructType *STy = dyn_cast<StructType>(OpTy))
4258         if (STy->getNumElements() == 1)
4259           OpTy = STy->getElementType(0);
4260 
4261       // If OpTy is not a single value, it may be a struct/union that we
4262       // can tile with integers.
4263       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4264         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4265         switch (BitSize) {
4266         default: break;
4267         case 1:
4268         case 8:
4269         case 16:
4270         case 32:
4271         case 64:
4272         case 128:
4273           OpInfo.ConstraintVT =
4274               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4275           break;
4276         }
4277       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4278         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4279         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4280       } else {
4281         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4282       }
4283     }
4284   }
4285 
4286   // If we have multiple alternative constraints, select the best alternative.
4287   if (!ConstraintOperands.empty()) {
4288     if (maCount) {
4289       unsigned bestMAIndex = 0;
4290       int bestWeight = -1;
4291       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4292       int weight = -1;
4293       unsigned maIndex;
4294       // Compute the sums of the weights for each alternative, keeping track
4295       // of the best (highest weight) one so far.
4296       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4297         int weightSum = 0;
4298         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4299              cIndex != eIndex; ++cIndex) {
4300           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4301           if (OpInfo.Type == InlineAsm::isClobber)
4302             continue;
4303 
4304           // If this is an output operand with a matching input operand,
4305           // look up the matching input. If their types mismatch, e.g. one
4306           // is an integer, the other is floating point, or their sizes are
4307           // different, flag it as an maCantMatch.
4308           if (OpInfo.hasMatchingInput()) {
4309             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4310             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4311               if ((OpInfo.ConstraintVT.isInteger() !=
4312                    Input.ConstraintVT.isInteger()) ||
4313                   (OpInfo.ConstraintVT.getSizeInBits() !=
4314                    Input.ConstraintVT.getSizeInBits())) {
4315                 weightSum = -1; // Can't match.
4316                 break;
4317               }
4318             }
4319           }
4320           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4321           if (weight == -1) {
4322             weightSum = -1;
4323             break;
4324           }
4325           weightSum += weight;
4326         }
4327         // Update best.
4328         if (weightSum > bestWeight) {
4329           bestWeight = weightSum;
4330           bestMAIndex = maIndex;
4331         }
4332       }
4333 
4334       // Now select chosen alternative in each constraint.
4335       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4336            cIndex != eIndex; ++cIndex) {
4337         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4338         if (cInfo.Type == InlineAsm::isClobber)
4339           continue;
4340         cInfo.selectAlternative(bestMAIndex);
4341       }
4342     }
4343   }
4344 
4345   // Check and hook up tied operands, choose constraint code to use.
4346   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4347        cIndex != eIndex; ++cIndex) {
4348     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4349 
4350     // If this is an output operand with a matching input operand, look up the
4351     // matching input. If their types mismatch, e.g. one is an integer, the
4352     // other is floating point, or their sizes are different, flag it as an
4353     // error.
4354     if (OpInfo.hasMatchingInput()) {
4355       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4356 
4357       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4358         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4359             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4360                                          OpInfo.ConstraintVT);
4361         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4362             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4363                                          Input.ConstraintVT);
4364         if ((OpInfo.ConstraintVT.isInteger() !=
4365              Input.ConstraintVT.isInteger()) ||
4366             (MatchRC.second != InputRC.second)) {
4367           report_fatal_error("Unsupported asm: input constraint"
4368                              " with a matching output constraint of"
4369                              " incompatible type!");
4370         }
4371       }
4372     }
4373   }
4374 
4375   return ConstraintOperands;
4376 }
4377 
4378 /// Return an integer indicating how general CT is.
4379 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4380   switch (CT) {
4381   case TargetLowering::C_Immediate:
4382   case TargetLowering::C_Other:
4383   case TargetLowering::C_Unknown:
4384     return 0;
4385   case TargetLowering::C_Register:
4386     return 1;
4387   case TargetLowering::C_RegisterClass:
4388     return 2;
4389   case TargetLowering::C_Memory:
4390     return 3;
4391   }
4392   llvm_unreachable("Invalid constraint type");
4393 }
4394 
4395 /// Examine constraint type and operand type and determine a weight value.
4396 /// This object must already have been set up with the operand type
4397 /// and the current alternative constraint selected.
4398 TargetLowering::ConstraintWeight
4399   TargetLowering::getMultipleConstraintMatchWeight(
4400     AsmOperandInfo &info, int maIndex) const {
4401   InlineAsm::ConstraintCodeVector *rCodes;
4402   if (maIndex >= (int)info.multipleAlternatives.size())
4403     rCodes = &info.Codes;
4404   else
4405     rCodes = &info.multipleAlternatives[maIndex].Codes;
4406   ConstraintWeight BestWeight = CW_Invalid;
4407 
4408   // Loop over the options, keeping track of the most general one.
4409   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4410     ConstraintWeight weight =
4411       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4412     if (weight > BestWeight)
4413       BestWeight = weight;
4414   }
4415 
4416   return BestWeight;
4417 }
4418 
4419 /// Examine constraint type and operand type and determine a weight value.
4420 /// This object must already have been set up with the operand type
4421 /// and the current alternative constraint selected.
4422 TargetLowering::ConstraintWeight
4423   TargetLowering::getSingleConstraintMatchWeight(
4424     AsmOperandInfo &info, const char *constraint) const {
4425   ConstraintWeight weight = CW_Invalid;
4426   Value *CallOperandVal = info.CallOperandVal;
4427     // If we don't have a value, we can't do a match,
4428     // but allow it at the lowest weight.
4429   if (!CallOperandVal)
4430     return CW_Default;
4431   // Look at the constraint type.
4432   switch (*constraint) {
4433     case 'i': // immediate integer.
4434     case 'n': // immediate integer with a known value.
4435       if (isa<ConstantInt>(CallOperandVal))
4436         weight = CW_Constant;
4437       break;
4438     case 's': // non-explicit intregal immediate.
4439       if (isa<GlobalValue>(CallOperandVal))
4440         weight = CW_Constant;
4441       break;
4442     case 'E': // immediate float if host format.
4443     case 'F': // immediate float.
4444       if (isa<ConstantFP>(CallOperandVal))
4445         weight = CW_Constant;
4446       break;
4447     case '<': // memory operand with autodecrement.
4448     case '>': // memory operand with autoincrement.
4449     case 'm': // memory operand.
4450     case 'o': // offsettable memory operand
4451     case 'V': // non-offsettable memory operand
4452       weight = CW_Memory;
4453       break;
4454     case 'r': // general register.
4455     case 'g': // general register, memory operand or immediate integer.
4456               // note: Clang converts "g" to "imr".
4457       if (CallOperandVal->getType()->isIntegerTy())
4458         weight = CW_Register;
4459       break;
4460     case 'X': // any operand.
4461   default:
4462     weight = CW_Default;
4463     break;
4464   }
4465   return weight;
4466 }
4467 
4468 /// If there are multiple different constraints that we could pick for this
4469 /// operand (e.g. "imr") try to pick the 'best' one.
4470 /// This is somewhat tricky: constraints fall into four classes:
4471 ///    Other         -> immediates and magic values
4472 ///    Register      -> one specific register
4473 ///    RegisterClass -> a group of regs
4474 ///    Memory        -> memory
4475 /// Ideally, we would pick the most specific constraint possible: if we have
4476 /// something that fits into a register, we would pick it.  The problem here
4477 /// is that if we have something that could either be in a register or in
4478 /// memory that use of the register could cause selection of *other*
4479 /// operands to fail: they might only succeed if we pick memory.  Because of
4480 /// this the heuristic we use is:
4481 ///
4482 ///  1) If there is an 'other' constraint, and if the operand is valid for
4483 ///     that constraint, use it.  This makes us take advantage of 'i'
4484 ///     constraints when available.
4485 ///  2) Otherwise, pick the most general constraint present.  This prefers
4486 ///     'm' over 'r', for example.
4487 ///
4488 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4489                              const TargetLowering &TLI,
4490                              SDValue Op, SelectionDAG *DAG) {
4491   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4492   unsigned BestIdx = 0;
4493   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4494   int BestGenerality = -1;
4495 
4496   // Loop over the options, keeping track of the most general one.
4497   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4498     TargetLowering::ConstraintType CType =
4499       TLI.getConstraintType(OpInfo.Codes[i]);
4500 
4501     // Indirect 'other' or 'immediate' constraints are not allowed.
4502     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4503                                CType == TargetLowering::C_Register ||
4504                                CType == TargetLowering::C_RegisterClass))
4505       continue;
4506 
4507     // If this is an 'other' or 'immediate' constraint, see if the operand is
4508     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4509     // the operand is an integer in the range [0..31] we want to use I (saving a
4510     // load of a register), otherwise we must use 'r'.
4511     if ((CType == TargetLowering::C_Other ||
4512          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4513       assert(OpInfo.Codes[i].size() == 1 &&
4514              "Unhandled multi-letter 'other' constraint");
4515       std::vector<SDValue> ResultOps;
4516       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4517                                        ResultOps, *DAG);
4518       if (!ResultOps.empty()) {
4519         BestType = CType;
4520         BestIdx = i;
4521         break;
4522       }
4523     }
4524 
4525     // Things with matching constraints can only be registers, per gcc
4526     // documentation.  This mainly affects "g" constraints.
4527     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4528       continue;
4529 
4530     // This constraint letter is more general than the previous one, use it.
4531     int Generality = getConstraintGenerality(CType);
4532     if (Generality > BestGenerality) {
4533       BestType = CType;
4534       BestIdx = i;
4535       BestGenerality = Generality;
4536     }
4537   }
4538 
4539   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4540   OpInfo.ConstraintType = BestType;
4541 }
4542 
4543 /// Determines the constraint code and constraint type to use for the specific
4544 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4545 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4546                                             SDValue Op,
4547                                             SelectionDAG *DAG) const {
4548   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4549 
4550   // Single-letter constraints ('r') are very common.
4551   if (OpInfo.Codes.size() == 1) {
4552     OpInfo.ConstraintCode = OpInfo.Codes[0];
4553     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4554   } else {
4555     ChooseConstraint(OpInfo, *this, Op, DAG);
4556   }
4557 
4558   // 'X' matches anything.
4559   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4560     // Labels and constants are handled elsewhere ('X' is the only thing
4561     // that matches labels).  For Functions, the type here is the type of
4562     // the result, which is not what we want to look at; leave them alone.
4563     Value *v = OpInfo.CallOperandVal;
4564     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4565       OpInfo.CallOperandVal = v;
4566       return;
4567     }
4568 
4569     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4570       return;
4571 
4572     // Otherwise, try to resolve it to something we know about by looking at
4573     // the actual operand type.
4574     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4575       OpInfo.ConstraintCode = Repl;
4576       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4577     }
4578   }
4579 }
4580 
4581 /// Given an exact SDIV by a constant, create a multiplication
4582 /// with the multiplicative inverse of the constant.
4583 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4584                               const SDLoc &dl, SelectionDAG &DAG,
4585                               SmallVectorImpl<SDNode *> &Created) {
4586   SDValue Op0 = N->getOperand(0);
4587   SDValue Op1 = N->getOperand(1);
4588   EVT VT = N->getValueType(0);
4589   EVT SVT = VT.getScalarType();
4590   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4591   EVT ShSVT = ShVT.getScalarType();
4592 
4593   bool UseSRA = false;
4594   SmallVector<SDValue, 16> Shifts, Factors;
4595 
4596   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4597     if (C->isNullValue())
4598       return false;
4599     APInt Divisor = C->getAPIntValue();
4600     unsigned Shift = Divisor.countTrailingZeros();
4601     if (Shift) {
4602       Divisor.ashrInPlace(Shift);
4603       UseSRA = true;
4604     }
4605     // Calculate the multiplicative inverse, using Newton's method.
4606     APInt t;
4607     APInt Factor = Divisor;
4608     while ((t = Divisor * Factor) != 1)
4609       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4610     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4611     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4612     return true;
4613   };
4614 
4615   // Collect all magic values from the build vector.
4616   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4617     return SDValue();
4618 
4619   SDValue Shift, Factor;
4620   if (VT.isVector()) {
4621     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4622     Factor = DAG.getBuildVector(VT, dl, Factors);
4623   } else {
4624     Shift = Shifts[0];
4625     Factor = Factors[0];
4626   }
4627 
4628   SDValue Res = Op0;
4629 
4630   // Shift the value upfront if it is even, so the LSB is one.
4631   if (UseSRA) {
4632     // TODO: For UDIV use SRL instead of SRA.
4633     SDNodeFlags Flags;
4634     Flags.setExact(true);
4635     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4636     Created.push_back(Res.getNode());
4637   }
4638 
4639   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4640 }
4641 
4642 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4643                               SelectionDAG &DAG,
4644                               SmallVectorImpl<SDNode *> &Created) const {
4645   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4646   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4647   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4648     return SDValue(N, 0); // Lower SDIV as SDIV
4649   return SDValue();
4650 }
4651 
4652 /// Given an ISD::SDIV node expressing a divide by constant,
4653 /// return a DAG expression to select that will generate the same value by
4654 /// multiplying by a magic number.
4655 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4656 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4657                                   bool IsAfterLegalization,
4658                                   SmallVectorImpl<SDNode *> &Created) const {
4659   SDLoc dl(N);
4660   EVT VT = N->getValueType(0);
4661   EVT SVT = VT.getScalarType();
4662   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4663   EVT ShSVT = ShVT.getScalarType();
4664   unsigned EltBits = VT.getScalarSizeInBits();
4665 
4666   // Check to see if we can do this.
4667   // FIXME: We should be more aggressive here.
4668   if (!isTypeLegal(VT))
4669     return SDValue();
4670 
4671   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4672   if (N->getFlags().hasExact())
4673     return BuildExactSDIV(*this, N, dl, DAG, Created);
4674 
4675   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4676 
4677   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4678     if (C->isNullValue())
4679       return false;
4680 
4681     const APInt &Divisor = C->getAPIntValue();
4682     APInt::ms magics = Divisor.magic();
4683     int NumeratorFactor = 0;
4684     int ShiftMask = -1;
4685 
4686     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4687       // If d is +1/-1, we just multiply the numerator by +1/-1.
4688       NumeratorFactor = Divisor.getSExtValue();
4689       magics.m = 0;
4690       magics.s = 0;
4691       ShiftMask = 0;
4692     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4693       // If d > 0 and m < 0, add the numerator.
4694       NumeratorFactor = 1;
4695     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4696       // If d < 0 and m > 0, subtract the numerator.
4697       NumeratorFactor = -1;
4698     }
4699 
4700     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4701     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4702     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4703     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4704     return true;
4705   };
4706 
4707   SDValue N0 = N->getOperand(0);
4708   SDValue N1 = N->getOperand(1);
4709 
4710   // Collect the shifts / magic values from each element.
4711   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4712     return SDValue();
4713 
4714   SDValue MagicFactor, Factor, Shift, ShiftMask;
4715   if (VT.isVector()) {
4716     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4717     Factor = DAG.getBuildVector(VT, dl, Factors);
4718     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4719     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4720   } else {
4721     MagicFactor = MagicFactors[0];
4722     Factor = Factors[0];
4723     Shift = Shifts[0];
4724     ShiftMask = ShiftMasks[0];
4725   }
4726 
4727   // Multiply the numerator (operand 0) by the magic value.
4728   // FIXME: We should support doing a MUL in a wider type.
4729   SDValue Q;
4730   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4731                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4732     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4733   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4734                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4735     SDValue LoHi =
4736         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4737     Q = SDValue(LoHi.getNode(), 1);
4738   } else
4739     return SDValue(); // No mulhs or equivalent.
4740   Created.push_back(Q.getNode());
4741 
4742   // (Optionally) Add/subtract the numerator using Factor.
4743   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4744   Created.push_back(Factor.getNode());
4745   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4746   Created.push_back(Q.getNode());
4747 
4748   // Shift right algebraic by shift value.
4749   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4750   Created.push_back(Q.getNode());
4751 
4752   // Extract the sign bit, mask it and add it to the quotient.
4753   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4754   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4755   Created.push_back(T.getNode());
4756   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4757   Created.push_back(T.getNode());
4758   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4759 }
4760 
4761 /// Given an ISD::UDIV node expressing a divide by constant,
4762 /// return a DAG expression to select that will generate the same value by
4763 /// multiplying by a magic number.
4764 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4765 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4766                                   bool IsAfterLegalization,
4767                                   SmallVectorImpl<SDNode *> &Created) const {
4768   SDLoc dl(N);
4769   EVT VT = N->getValueType(0);
4770   EVT SVT = VT.getScalarType();
4771   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4772   EVT ShSVT = ShVT.getScalarType();
4773   unsigned EltBits = VT.getScalarSizeInBits();
4774 
4775   // Check to see if we can do this.
4776   // FIXME: We should be more aggressive here.
4777   if (!isTypeLegal(VT))
4778     return SDValue();
4779 
4780   bool UseNPQ = false;
4781   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4782 
4783   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4784     if (C->isNullValue())
4785       return false;
4786     // FIXME: We should use a narrower constant when the upper
4787     // bits are known to be zero.
4788     APInt Divisor = C->getAPIntValue();
4789     APInt::mu magics = Divisor.magicu();
4790     unsigned PreShift = 0, PostShift = 0;
4791 
4792     // If the divisor is even, we can avoid using the expensive fixup by
4793     // shifting the divided value upfront.
4794     if (magics.a != 0 && !Divisor[0]) {
4795       PreShift = Divisor.countTrailingZeros();
4796       // Get magic number for the shifted divisor.
4797       magics = Divisor.lshr(PreShift).magicu(PreShift);
4798       assert(magics.a == 0 && "Should use cheap fixup now");
4799     }
4800 
4801     APInt Magic = magics.m;
4802 
4803     unsigned SelNPQ;
4804     if (magics.a == 0 || Divisor.isOneValue()) {
4805       assert(magics.s < Divisor.getBitWidth() &&
4806              "We shouldn't generate an undefined shift!");
4807       PostShift = magics.s;
4808       SelNPQ = false;
4809     } else {
4810       PostShift = magics.s - 1;
4811       SelNPQ = true;
4812     }
4813 
4814     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4815     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4816     NPQFactors.push_back(
4817         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4818                                : APInt::getNullValue(EltBits),
4819                         dl, SVT));
4820     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4821     UseNPQ |= SelNPQ;
4822     return true;
4823   };
4824 
4825   SDValue N0 = N->getOperand(0);
4826   SDValue N1 = N->getOperand(1);
4827 
4828   // Collect the shifts/magic values from each element.
4829   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4830     return SDValue();
4831 
4832   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4833   if (VT.isVector()) {
4834     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4835     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4836     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4837     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4838   } else {
4839     PreShift = PreShifts[0];
4840     MagicFactor = MagicFactors[0];
4841     PostShift = PostShifts[0];
4842   }
4843 
4844   SDValue Q = N0;
4845   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4846   Created.push_back(Q.getNode());
4847 
4848   // FIXME: We should support doing a MUL in a wider type.
4849   auto GetMULHU = [&](SDValue X, SDValue Y) {
4850     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
4851                             : isOperationLegalOrCustom(ISD::MULHU, VT))
4852       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4853     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
4854                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
4855       SDValue LoHi =
4856           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4857       return SDValue(LoHi.getNode(), 1);
4858     }
4859     return SDValue(); // No mulhu or equivalent
4860   };
4861 
4862   // Multiply the numerator (operand 0) by the magic value.
4863   Q = GetMULHU(Q, MagicFactor);
4864   if (!Q)
4865     return SDValue();
4866 
4867   Created.push_back(Q.getNode());
4868 
4869   if (UseNPQ) {
4870     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4871     Created.push_back(NPQ.getNode());
4872 
4873     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4874     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4875     if (VT.isVector())
4876       NPQ = GetMULHU(NPQ, NPQFactor);
4877     else
4878       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4879 
4880     Created.push_back(NPQ.getNode());
4881 
4882     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4883     Created.push_back(Q.getNode());
4884   }
4885 
4886   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4887   Created.push_back(Q.getNode());
4888 
4889   SDValue One = DAG.getConstant(1, dl, VT);
4890   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4891   return DAG.getSelect(dl, VT, IsOne, N0, Q);
4892 }
4893 
4894 /// If all values in Values that *don't* match the predicate are same 'splat'
4895 /// value, then replace all values with that splat value.
4896 /// Else, if AlternativeReplacement was provided, then replace all values that
4897 /// do match predicate with AlternativeReplacement value.
4898 static void
4899 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4900                           std::function<bool(SDValue)> Predicate,
4901                           SDValue AlternativeReplacement = SDValue()) {
4902   SDValue Replacement;
4903   // Is there a value for which the Predicate does *NOT* match? What is it?
4904   auto SplatValue = llvm::find_if_not(Values, Predicate);
4905   if (SplatValue != Values.end()) {
4906     // Does Values consist only of SplatValue's and values matching Predicate?
4907     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
4908           return Value == *SplatValue || Predicate(Value);
4909         })) // Then we shall replace values matching predicate with SplatValue.
4910       Replacement = *SplatValue;
4911   }
4912   if (!Replacement) {
4913     // Oops, we did not find the "baseline" splat value.
4914     if (!AlternativeReplacement)
4915       return; // Nothing to do.
4916     // Let's replace with provided value then.
4917     Replacement = AlternativeReplacement;
4918   }
4919   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4920 }
4921 
4922 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4923 /// where the divisor is constant and the comparison target is zero,
4924 /// return a DAG expression that will generate the same comparison result
4925 /// using only multiplications, additions and shifts/rotations.
4926 /// Ref: "Hacker's Delight" 10-17.
4927 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4928                                         SDValue CompTargetNode,
4929                                         ISD::CondCode Cond,
4930                                         DAGCombinerInfo &DCI,
4931                                         const SDLoc &DL) const {
4932   SmallVector<SDNode *, 5> Built;
4933   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4934                                          DCI, DL, Built)) {
4935     for (SDNode *N : Built)
4936       DCI.AddToWorklist(N);
4937     return Folded;
4938   }
4939 
4940   return SDValue();
4941 }
4942 
4943 SDValue
4944 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4945                                   SDValue CompTargetNode, ISD::CondCode Cond,
4946                                   DAGCombinerInfo &DCI, const SDLoc &DL,
4947                                   SmallVectorImpl<SDNode *> &Created) const {
4948   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4949   // - D must be constant, with D = D0 * 2^K where D0 is odd
4950   // - P is the multiplicative inverse of D0 modulo 2^W
4951   // - Q = floor(((2^W) - 1) / D)
4952   // where W is the width of the common type of N and D.
4953   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4954          "Only applicable for (in)equality comparisons.");
4955 
4956   SelectionDAG &DAG = DCI.DAG;
4957 
4958   EVT VT = REMNode.getValueType();
4959   EVT SVT = VT.getScalarType();
4960   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4961   EVT ShSVT = ShVT.getScalarType();
4962 
4963   // If MUL is unavailable, we cannot proceed in any case.
4964   if (!isOperationLegalOrCustom(ISD::MUL, VT))
4965     return SDValue();
4966 
4967   bool ComparingWithAllZeros = true;
4968   bool AllComparisonsWithNonZerosAreTautological = true;
4969   bool HadTautologicalLanes = false;
4970   bool AllLanesAreTautological = true;
4971   bool HadEvenDivisor = false;
4972   bool AllDivisorsArePowerOfTwo = true;
4973   bool HadTautologicalInvertedLanes = false;
4974   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
4975 
4976   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
4977     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
4978     if (CDiv->isNullValue())
4979       return false;
4980 
4981     const APInt &D = CDiv->getAPIntValue();
4982     const APInt &Cmp = CCmp->getAPIntValue();
4983 
4984     ComparingWithAllZeros &= Cmp.isNullValue();
4985 
4986     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
4987     // if C2 is not less than C1, the comparison is always false.
4988     // But we will only be able to produce the comparison that will give the
4989     // opposive tautological answer. So this lane would need to be fixed up.
4990     bool TautologicalInvertedLane = D.ule(Cmp);
4991     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
4992 
4993     // If all lanes are tautological (either all divisors are ones, or divisor
4994     // is not greater than the constant we are comparing with),
4995     // we will prefer to avoid the fold.
4996     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
4997     HadTautologicalLanes |= TautologicalLane;
4998     AllLanesAreTautological &= TautologicalLane;
4999 
5000     // If we are comparing with non-zero, we need'll need  to subtract said
5001     // comparison value from the LHS. But there is no point in doing that if
5002     // every lane where we are comparing with non-zero is tautological..
5003     if (!Cmp.isNullValue())
5004       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5005 
5006     // Decompose D into D0 * 2^K
5007     unsigned K = D.countTrailingZeros();
5008     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5009     APInt D0 = D.lshr(K);
5010 
5011     // D is even if it has trailing zeros.
5012     HadEvenDivisor |= (K != 0);
5013     // D is a power-of-two if D0 is one.
5014     // If all divisors are power-of-two, we will prefer to avoid the fold.
5015     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5016 
5017     // P = inv(D0, 2^W)
5018     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5019     unsigned W = D.getBitWidth();
5020     APInt P = D0.zext(W + 1)
5021                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5022                   .trunc(W);
5023     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5024     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5025 
5026     // Q = floor((2^W - 1) u/ D)
5027     // R = ((2^W - 1) u% D)
5028     APInt Q, R;
5029     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5030 
5031     // If we are comparing with zero, then that comparison constant is okay,
5032     // else it may need to be one less than that.
5033     if (Cmp.ugt(R))
5034       Q -= 1;
5035 
5036     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5037            "We are expecting that K is always less than all-ones for ShSVT");
5038 
5039     // If the lane is tautological the result can be constant-folded.
5040     if (TautologicalLane) {
5041       // Set P and K amount to a bogus values so we can try to splat them.
5042       P = 0;
5043       K = -1;
5044       // And ensure that comparison constant is tautological,
5045       // it will always compare true/false.
5046       Q = -1;
5047     }
5048 
5049     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5050     KAmts.push_back(
5051         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5052     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5053     return true;
5054   };
5055 
5056   SDValue N = REMNode.getOperand(0);
5057   SDValue D = REMNode.getOperand(1);
5058 
5059   // Collect the values from each element.
5060   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5061     return SDValue();
5062 
5063   // If all lanes are tautological, the result can be constant-folded.
5064   if (AllLanesAreTautological)
5065     return SDValue();
5066 
5067   // If this is a urem by a powers-of-two, avoid the fold since it can be
5068   // best implemented as a bit test.
5069   if (AllDivisorsArePowerOfTwo)
5070     return SDValue();
5071 
5072   SDValue PVal, KVal, QVal;
5073   if (VT.isVector()) {
5074     if (HadTautologicalLanes) {
5075       // Try to turn PAmts into a splat, since we don't care about the values
5076       // that are currently '0'. If we can't, just keep '0'`s.
5077       turnVectorIntoSplatVector(PAmts, isNullConstant);
5078       // Try to turn KAmts into a splat, since we don't care about the values
5079       // that are currently '-1'. If we can't, change them to '0'`s.
5080       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5081                                 DAG.getConstant(0, DL, ShSVT));
5082     }
5083 
5084     PVal = DAG.getBuildVector(VT, DL, PAmts);
5085     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5086     QVal = DAG.getBuildVector(VT, DL, QAmts);
5087   } else {
5088     PVal = PAmts[0];
5089     KVal = KAmts[0];
5090     QVal = QAmts[0];
5091   }
5092 
5093   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5094     if (!isOperationLegalOrCustom(ISD::SUB, VT))
5095       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5096     assert(CompTargetNode.getValueType() == N.getValueType() &&
5097            "Expecting that the types on LHS and RHS of comparisons match.");
5098     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5099   }
5100 
5101   // (mul N, P)
5102   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5103   Created.push_back(Op0.getNode());
5104 
5105   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5106   // divisors as a performance improvement, since rotating by 0 is a no-op.
5107   if (HadEvenDivisor) {
5108     // We need ROTR to do this.
5109     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5110       return SDValue();
5111     SDNodeFlags Flags;
5112     Flags.setExact(true);
5113     // UREM: (rotr (mul N, P), K)
5114     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5115     Created.push_back(Op0.getNode());
5116   }
5117 
5118   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5119   SDValue NewCC =
5120       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5121                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5122   if (!HadTautologicalInvertedLanes)
5123     return NewCC;
5124 
5125   // If any lanes previously compared always-false, the NewCC will give
5126   // always-true result for them, so we need to fixup those lanes.
5127   // Or the other way around for inequality predicate.
5128   assert(VT.isVector() && "Can/should only get here for vectors.");
5129   Created.push_back(NewCC.getNode());
5130 
5131   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5132   // if C2 is not less than C1, the comparison is always false.
5133   // But we have produced the comparison that will give the
5134   // opposive tautological answer. So these lanes would need to be fixed up.
5135   SDValue TautologicalInvertedChannels =
5136       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5137   Created.push_back(TautologicalInvertedChannels.getNode());
5138 
5139   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5140     // If we have a vector select, let's replace the comparison results in the
5141     // affected lanes with the correct tautological result.
5142     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5143                                               DL, SETCCVT, SETCCVT);
5144     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5145                        Replacement, NewCC);
5146   }
5147 
5148   // Else, we can just invert the comparison result in the appropriate lanes.
5149   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5150     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5151                        TautologicalInvertedChannels);
5152 
5153   return SDValue(); // Don't know how to lower.
5154 }
5155 
5156 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5157 /// where the divisor is constant and the comparison target is zero,
5158 /// return a DAG expression that will generate the same comparison result
5159 /// using only multiplications, additions and shifts/rotations.
5160 /// Ref: "Hacker's Delight" 10-17.
5161 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5162                                         SDValue CompTargetNode,
5163                                         ISD::CondCode Cond,
5164                                         DAGCombinerInfo &DCI,
5165                                         const SDLoc &DL) const {
5166   SmallVector<SDNode *, 7> Built;
5167   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5168                                          DCI, DL, Built)) {
5169     assert(Built.size() <= 7 && "Max size prediction failed.");
5170     for (SDNode *N : Built)
5171       DCI.AddToWorklist(N);
5172     return Folded;
5173   }
5174 
5175   return SDValue();
5176 }
5177 
5178 SDValue
5179 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5180                                   SDValue CompTargetNode, ISD::CondCode Cond,
5181                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5182                                   SmallVectorImpl<SDNode *> &Created) const {
5183   // Fold:
5184   //   (seteq/ne (srem N, D), 0)
5185   // To:
5186   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5187   //
5188   // - D must be constant, with D = D0 * 2^K where D0 is odd
5189   // - P is the multiplicative inverse of D0 modulo 2^W
5190   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5191   // - Q = floor((2 * A) / (2^K))
5192   // where W is the width of the common type of N and D.
5193   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5194          "Only applicable for (in)equality comparisons.");
5195 
5196   SelectionDAG &DAG = DCI.DAG;
5197 
5198   EVT VT = REMNode.getValueType();
5199   EVT SVT = VT.getScalarType();
5200   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5201   EVT ShSVT = ShVT.getScalarType();
5202 
5203   // If MUL is unavailable, we cannot proceed in any case.
5204   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5205     return SDValue();
5206 
5207   // TODO: Could support comparing with non-zero too.
5208   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5209   if (!CompTarget || !CompTarget->isNullValue())
5210     return SDValue();
5211 
5212   bool HadIntMinDivisor = false;
5213   bool HadOneDivisor = false;
5214   bool AllDivisorsAreOnes = true;
5215   bool HadEvenDivisor = false;
5216   bool NeedToApplyOffset = false;
5217   bool AllDivisorsArePowerOfTwo = true;
5218   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5219 
5220   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5221     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5222     if (C->isNullValue())
5223       return false;
5224 
5225     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5226 
5227     // WARNING: this fold is only valid for positive divisors!
5228     APInt D = C->getAPIntValue();
5229     if (D.isNegative())
5230       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5231 
5232     HadIntMinDivisor |= D.isMinSignedValue();
5233 
5234     // If all divisors are ones, we will prefer to avoid the fold.
5235     HadOneDivisor |= D.isOneValue();
5236     AllDivisorsAreOnes &= D.isOneValue();
5237 
5238     // Decompose D into D0 * 2^K
5239     unsigned K = D.countTrailingZeros();
5240     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5241     APInt D0 = D.lshr(K);
5242 
5243     if (!D.isMinSignedValue()) {
5244       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5245       // we don't care about this lane in this fold, we'll special-handle it.
5246       HadEvenDivisor |= (K != 0);
5247     }
5248 
5249     // D is a power-of-two if D0 is one. This includes INT_MIN.
5250     // If all divisors are power-of-two, we will prefer to avoid the fold.
5251     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5252 
5253     // P = inv(D0, 2^W)
5254     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5255     unsigned W = D.getBitWidth();
5256     APInt P = D0.zext(W + 1)
5257                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5258                   .trunc(W);
5259     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5260     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5261 
5262     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5263     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5264     A.clearLowBits(K);
5265 
5266     if (!D.isMinSignedValue()) {
5267       // If divisor INT_MIN, then we don't care about this lane in this fold,
5268       // we'll special-handle it.
5269       NeedToApplyOffset |= A != 0;
5270     }
5271 
5272     // Q = floor((2 * A) / (2^K))
5273     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5274 
5275     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5276            "We are expecting that A is always less than all-ones for SVT");
5277     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5278            "We are expecting that K is always less than all-ones for ShSVT");
5279 
5280     // If the divisor is 1 the result can be constant-folded. Likewise, we
5281     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5282     if (D.isOneValue()) {
5283       // Set P, A and K to a bogus values so we can try to splat them.
5284       P = 0;
5285       A = -1;
5286       K = -1;
5287 
5288       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5289       Q = -1;
5290     }
5291 
5292     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5293     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5294     KAmts.push_back(
5295         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5296     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5297     return true;
5298   };
5299 
5300   SDValue N = REMNode.getOperand(0);
5301   SDValue D = REMNode.getOperand(1);
5302 
5303   // Collect the values from each element.
5304   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5305     return SDValue();
5306 
5307   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5308   if (AllDivisorsAreOnes)
5309     return SDValue();
5310 
5311   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5312   // since it can be best implemented as a bit test.
5313   if (AllDivisorsArePowerOfTwo)
5314     return SDValue();
5315 
5316   SDValue PVal, AVal, KVal, QVal;
5317   if (VT.isVector()) {
5318     if (HadOneDivisor) {
5319       // Try to turn PAmts into a splat, since we don't care about the values
5320       // that are currently '0'. If we can't, just keep '0'`s.
5321       turnVectorIntoSplatVector(PAmts, isNullConstant);
5322       // Try to turn AAmts into a splat, since we don't care about the
5323       // values that are currently '-1'. If we can't, change them to '0'`s.
5324       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5325                                 DAG.getConstant(0, DL, SVT));
5326       // Try to turn KAmts into a splat, since we don't care about the values
5327       // that are currently '-1'. If we can't, change them to '0'`s.
5328       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5329                                 DAG.getConstant(0, DL, ShSVT));
5330     }
5331 
5332     PVal = DAG.getBuildVector(VT, DL, PAmts);
5333     AVal = DAG.getBuildVector(VT, DL, AAmts);
5334     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5335     QVal = DAG.getBuildVector(VT, DL, QAmts);
5336   } else {
5337     PVal = PAmts[0];
5338     AVal = AAmts[0];
5339     KVal = KAmts[0];
5340     QVal = QAmts[0];
5341   }
5342 
5343   // (mul N, P)
5344   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5345   Created.push_back(Op0.getNode());
5346 
5347   if (NeedToApplyOffset) {
5348     // We need ADD to do this.
5349     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5350       return SDValue();
5351 
5352     // (add (mul N, P), A)
5353     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5354     Created.push_back(Op0.getNode());
5355   }
5356 
5357   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5358   // divisors as a performance improvement, since rotating by 0 is a no-op.
5359   if (HadEvenDivisor) {
5360     // We need ROTR to do this.
5361     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5362       return SDValue();
5363     SDNodeFlags Flags;
5364     Flags.setExact(true);
5365     // SREM: (rotr (add (mul N, P), A), K)
5366     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5367     Created.push_back(Op0.getNode());
5368   }
5369 
5370   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5371   SDValue Fold =
5372       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5373                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5374 
5375   // If we didn't have lanes with INT_MIN divisor, then we're done.
5376   if (!HadIntMinDivisor)
5377     return Fold;
5378 
5379   // That fold is only valid for positive divisors. Which effectively means,
5380   // it is invalid for INT_MIN divisors. So if we have such a lane,
5381   // we must fix-up results for said lanes.
5382   assert(VT.isVector() && "Can/should only get here for vectors.");
5383 
5384   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5385       !isOperationLegalOrCustom(ISD::AND, VT) ||
5386       !isOperationLegalOrCustom(Cond, VT) ||
5387       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5388     return SDValue();
5389 
5390   Created.push_back(Fold.getNode());
5391 
5392   SDValue IntMin = DAG.getConstant(
5393       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5394   SDValue IntMax = DAG.getConstant(
5395       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5396   SDValue Zero =
5397       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5398 
5399   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5400   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5401   Created.push_back(DivisorIsIntMin.getNode());
5402 
5403   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5404   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5405   Created.push_back(Masked.getNode());
5406   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5407   Created.push_back(MaskedIsZero.getNode());
5408 
5409   // To produce final result we need to blend 2 vectors: 'SetCC' and
5410   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5411   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5412   // constant-folded, select can get lowered to a shuffle with constant mask.
5413   SDValue Blended =
5414       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5415 
5416   return Blended;
5417 }
5418 
5419 bool TargetLowering::
5420 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5421   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5422     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5423                                 "be a constant integer");
5424     return true;
5425   }
5426 
5427   return false;
5428 }
5429 
5430 char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
5431                                         bool LegalOperations, bool ForCodeSize,
5432                                         unsigned Depth) const {
5433   // fneg is removable even if it has multiple uses.
5434   if (Op.getOpcode() == ISD::FNEG)
5435     return 2;
5436 
5437   // Don't allow anything with multiple uses unless we know it is free.
5438   EVT VT = Op.getValueType();
5439   const SDNodeFlags Flags = Op->getFlags();
5440   const TargetOptions &Options = DAG.getTarget().Options;
5441   if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
5442                            isFPExtFree(VT, Op.getOperand(0).getValueType())))
5443     return 0;
5444 
5445   // Don't recurse exponentially.
5446   if (Depth > SelectionDAG::MaxRecursionDepth)
5447     return 0;
5448 
5449   switch (Op.getOpcode()) {
5450   case ISD::ConstantFP: {
5451     if (!LegalOperations)
5452       return 1;
5453 
5454     // Don't invert constant FP values after legalization unless the target says
5455     // the negated constant is legal.
5456     return isOperationLegal(ISD::ConstantFP, VT) ||
5457            isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5458                         ForCodeSize);
5459   }
5460   case ISD::BUILD_VECTOR: {
5461     // Only permit BUILD_VECTOR of constants.
5462     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5463           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5464         }))
5465       return 0;
5466     if (!LegalOperations)
5467       return 1;
5468     if (isOperationLegal(ISD::ConstantFP, VT) &&
5469         isOperationLegal(ISD::BUILD_VECTOR, VT))
5470       return 1;
5471     return llvm::all_of(Op->op_values(), [&](SDValue N) {
5472       return N.isUndef() ||
5473              isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5474                           ForCodeSize);
5475     });
5476   }
5477   case ISD::FADD:
5478     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5479       return 0;
5480 
5481     // After operation legalization, it might not be legal to create new FSUBs.
5482     if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
5483       return 0;
5484 
5485     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5486     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5487                                     ForCodeSize, Depth + 1))
5488       return V;
5489     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5490     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5491                               ForCodeSize, Depth + 1);
5492   case ISD::FSUB:
5493     // We can't turn -(A-B) into B-A when we honor signed zeros.
5494     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5495       return 0;
5496 
5497     // fold (fneg (fsub A, B)) -> (fsub B, A)
5498     return 1;
5499 
5500   case ISD::FMUL:
5501   case ISD::FDIV:
5502     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
5503     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5504                                     ForCodeSize, Depth + 1))
5505       return V;
5506 
5507     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5508     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5509       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5510         return 0;
5511 
5512     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5513                               ForCodeSize, Depth + 1);
5514 
5515   case ISD::FMA:
5516   case ISD::FMAD: {
5517     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5518       return 0;
5519 
5520     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5521     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5522     char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
5523                                  ForCodeSize, Depth + 1);
5524     if (!V2)
5525       return 0;
5526 
5527     // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
5528     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5529                                  ForCodeSize, Depth + 1);
5530     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5531                                  ForCodeSize, Depth + 1);
5532     char V01 = std::max(V0, V1);
5533     return V01 ? std::max(V01, V2) : 0;
5534   }
5535 
5536   case ISD::FP_EXTEND:
5537   case ISD::FP_ROUND:
5538   case ISD::FSIN:
5539     return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5540                               ForCodeSize, Depth + 1);
5541   }
5542 
5543   return 0;
5544 }
5545 
5546 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5547                                              bool LegalOperations,
5548                                              bool ForCodeSize,
5549                                              unsigned Depth) const {
5550   // fneg is removable even if it has multiple uses.
5551   if (Op.getOpcode() == ISD::FNEG)
5552     return Op.getOperand(0);
5553 
5554   assert(Depth <= SelectionDAG::MaxRecursionDepth &&
5555          "getNegatedExpression doesn't match isNegatibleForFree");
5556   const SDNodeFlags Flags = Op->getFlags();
5557 
5558   switch (Op.getOpcode()) {
5559   case ISD::ConstantFP: {
5560     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5561     V.changeSign();
5562     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
5563   }
5564   case ISD::BUILD_VECTOR: {
5565     SmallVector<SDValue, 4> Ops;
5566     for (SDValue C : Op->op_values()) {
5567       if (C.isUndef()) {
5568         Ops.push_back(C);
5569         continue;
5570       }
5571       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5572       V.changeSign();
5573       Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
5574     }
5575     return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
5576   }
5577   case ISD::FADD:
5578     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5579             Flags.hasNoSignedZeros()) &&
5580            "Expected NSZ fp-flag");
5581 
5582     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5583     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5584                            Depth + 1))
5585       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5586                          getNegatedExpression(Op.getOperand(0), DAG,
5587                                               LegalOperations, ForCodeSize,
5588                                               Depth + 1),
5589                          Op.getOperand(1), Flags);
5590     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5591     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5592                        getNegatedExpression(Op.getOperand(1), DAG,
5593                                             LegalOperations, ForCodeSize,
5594                                             Depth + 1),
5595                        Op.getOperand(0), Flags);
5596   case ISD::FSUB:
5597     // fold (fneg (fsub 0, B)) -> B
5598     if (ConstantFPSDNode *N0CFP =
5599             isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
5600       if (N0CFP->isZero())
5601         return Op.getOperand(1);
5602 
5603     // fold (fneg (fsub A, B)) -> (fsub B, A)
5604     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5605                        Op.getOperand(1), Op.getOperand(0), Flags);
5606 
5607   case ISD::FMUL:
5608   case ISD::FDIV:
5609     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5610     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5611                            Depth + 1))
5612       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5613                          getNegatedExpression(Op.getOperand(0), DAG,
5614                                               LegalOperations, ForCodeSize,
5615                                               Depth + 1),
5616                          Op.getOperand(1), Flags);
5617 
5618     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5619     return DAG.getNode(
5620         Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
5621         getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5622                              ForCodeSize, Depth + 1),
5623         Flags);
5624 
5625   case ISD::FMA:
5626   case ISD::FMAD: {
5627     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5628             Flags.hasNoSignedZeros()) &&
5629            "Expected NSZ fp-flag");
5630 
5631     SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
5632                                         ForCodeSize, Depth + 1);
5633 
5634     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5635                                  ForCodeSize, Depth + 1);
5636     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5637                                  ForCodeSize, Depth + 1);
5638     // TODO: This is a hack. It is possible that costs have changed between now
5639     //       and the initial calls to isNegatibleForFree(). That is because we
5640     //       are rewriting the expression, and that may change the number of
5641     //       uses (and therefore the cost) of values. If the negation costs are
5642     //       equal, only negate this value if it is a constant. Otherwise, try
5643     //       operand 1. A better fix would eliminate uses as a cost factor or
5644     //       track the change in uses as we rewrite the expression.
5645     if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
5646       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5647       SDValue Neg0 = getNegatedExpression(
5648           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
5649       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
5650                          Op.getOperand(1), Neg2, Flags);
5651     }
5652 
5653     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5654     SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5655                                         ForCodeSize, Depth + 1);
5656     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5657                        Op.getOperand(0), Neg1, Neg2, Flags);
5658   }
5659 
5660   case ISD::FP_EXTEND:
5661   case ISD::FSIN:
5662     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5663                        getNegatedExpression(Op.getOperand(0), DAG,
5664                                             LegalOperations, ForCodeSize,
5665                                             Depth + 1));
5666   case ISD::FP_ROUND:
5667     return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
5668                        getNegatedExpression(Op.getOperand(0), DAG,
5669                                             LegalOperations, ForCodeSize,
5670                                             Depth + 1),
5671                        Op.getOperand(1));
5672   }
5673 
5674   llvm_unreachable("Unknown code");
5675 }
5676 
5677 //===----------------------------------------------------------------------===//
5678 // Legalization Utilities
5679 //===----------------------------------------------------------------------===//
5680 
5681 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5682                                     SDValue LHS, SDValue RHS,
5683                                     SmallVectorImpl<SDValue> &Result,
5684                                     EVT HiLoVT, SelectionDAG &DAG,
5685                                     MulExpansionKind Kind, SDValue LL,
5686                                     SDValue LH, SDValue RL, SDValue RH) const {
5687   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5688          Opcode == ISD::SMUL_LOHI);
5689 
5690   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5691                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5692   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5693                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5694   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5695                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5696   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5697                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5698 
5699   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5700     return false;
5701 
5702   unsigned OuterBitSize = VT.getScalarSizeInBits();
5703   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5704   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5705   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5706 
5707   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5708   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5709          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5710 
5711   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5712   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5713                           bool Signed) -> bool {
5714     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5715       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5716       Hi = SDValue(Lo.getNode(), 1);
5717       return true;
5718     }
5719     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5720       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5721       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5722       return true;
5723     }
5724     return false;
5725   };
5726 
5727   SDValue Lo, Hi;
5728 
5729   if (!LL.getNode() && !RL.getNode() &&
5730       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5731     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5732     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5733   }
5734 
5735   if (!LL.getNode())
5736     return false;
5737 
5738   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5739   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5740       DAG.MaskedValueIsZero(RHS, HighMask)) {
5741     // The inputs are both zero-extended.
5742     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5743       Result.push_back(Lo);
5744       Result.push_back(Hi);
5745       if (Opcode != ISD::MUL) {
5746         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5747         Result.push_back(Zero);
5748         Result.push_back(Zero);
5749       }
5750       return true;
5751     }
5752   }
5753 
5754   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5755       RHSSB > InnerBitSize) {
5756     // The input values are both sign-extended.
5757     // TODO non-MUL case?
5758     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5759       Result.push_back(Lo);
5760       Result.push_back(Hi);
5761       return true;
5762     }
5763   }
5764 
5765   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5766   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5767   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5768     // FIXME getShiftAmountTy does not always return a sensible result when VT
5769     // is an illegal type, and so the type may be too small to fit the shift
5770     // amount. Override it with i32. The shift will have to be legalized.
5771     ShiftAmountTy = MVT::i32;
5772   }
5773   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5774 
5775   if (!LH.getNode() && !RH.getNode() &&
5776       isOperationLegalOrCustom(ISD::SRL, VT) &&
5777       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5778     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5779     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
5780     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
5781     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
5782   }
5783 
5784   if (!LH.getNode())
5785     return false;
5786 
5787   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
5788     return false;
5789 
5790   Result.push_back(Lo);
5791 
5792   if (Opcode == ISD::MUL) {
5793     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
5794     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
5795     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
5796     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
5797     Result.push_back(Hi);
5798     return true;
5799   }
5800 
5801   // Compute the full width result.
5802   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
5803     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
5804     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5805     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
5806     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
5807   };
5808 
5809   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5810   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
5811     return false;
5812 
5813   // This is effectively the add part of a multiply-add of half-sized operands,
5814   // so it cannot overflow.
5815   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5816 
5817   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
5818     return false;
5819 
5820   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5821   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5822 
5823   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
5824                   isOperationLegalOrCustom(ISD::ADDE, VT));
5825   if (UseGlue)
5826     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
5827                        Merge(Lo, Hi));
5828   else
5829     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
5830                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
5831 
5832   SDValue Carry = Next.getValue(1);
5833   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5834   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5835 
5836   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
5837     return false;
5838 
5839   if (UseGlue)
5840     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
5841                      Carry);
5842   else
5843     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
5844                      Zero, Carry);
5845 
5846   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5847 
5848   if (Opcode == ISD::SMUL_LOHI) {
5849     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5850                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
5851     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
5852 
5853     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5854                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
5855     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
5856   }
5857 
5858   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5859   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5860   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5861   return true;
5862 }
5863 
5864 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5865                                SelectionDAG &DAG, MulExpansionKind Kind,
5866                                SDValue LL, SDValue LH, SDValue RL,
5867                                SDValue RH) const {
5868   SmallVector<SDValue, 2> Result;
5869   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
5870                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
5871                            DAG, Kind, LL, LH, RL, RH);
5872   if (Ok) {
5873     assert(Result.size() == 2);
5874     Lo = Result[0];
5875     Hi = Result[1];
5876   }
5877   return Ok;
5878 }
5879 
5880 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
5881                                        SelectionDAG &DAG) const {
5882   EVT VT = Node->getValueType(0);
5883 
5884   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5885                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5886                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5887                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
5888     return false;
5889 
5890   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5891   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5892   SDValue X = Node->getOperand(0);
5893   SDValue Y = Node->getOperand(1);
5894   SDValue Z = Node->getOperand(2);
5895 
5896   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5897   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
5898   SDLoc DL(SDValue(Node, 0));
5899 
5900   EVT ShVT = Z.getValueType();
5901   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5902   SDValue Zero = DAG.getConstant(0, DL, ShVT);
5903 
5904   SDValue ShAmt;
5905   if (isPowerOf2_32(EltSizeInBits)) {
5906     SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5907     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
5908   } else {
5909     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
5910   }
5911 
5912   SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
5913   SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
5914   SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5915   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
5916 
5917   // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5918   // and that is undefined. We must compare and select to avoid UB.
5919   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
5920 
5921   // For fshl, 0-shift returns the 1st arg (X).
5922   // For fshr, 0-shift returns the 2nd arg (Y).
5923   SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
5924   Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
5925   return true;
5926 }
5927 
5928 // TODO: Merge with expandFunnelShift.
5929 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
5930                                SelectionDAG &DAG) const {
5931   EVT VT = Node->getValueType(0);
5932   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5933   bool IsLeft = Node->getOpcode() == ISD::ROTL;
5934   SDValue Op0 = Node->getOperand(0);
5935   SDValue Op1 = Node->getOperand(1);
5936   SDLoc DL(SDValue(Node, 0));
5937 
5938   EVT ShVT = Op1.getValueType();
5939   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5940 
5941   // If a rotate in the other direction is legal, use it.
5942   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
5943   if (isOperationLegal(RevRot, VT)) {
5944     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5945     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
5946     return true;
5947   }
5948 
5949   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5950                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5951                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5952                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
5953                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
5954     return false;
5955 
5956   // Otherwise,
5957   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
5958   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
5959   //
5960   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
5961          "Expecting the type bitwidth to be a power of 2");
5962   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
5963   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
5964   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5965   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5966   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
5967   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
5968   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
5969                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
5970   return true;
5971 }
5972 
5973 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
5974                                       SelectionDAG &DAG) const {
5975   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
5976   SDValue Src = Node->getOperand(OpNo);
5977   EVT SrcVT = Src.getValueType();
5978   EVT DstVT = Node->getValueType(0);
5979   SDLoc dl(SDValue(Node, 0));
5980 
5981   // FIXME: Only f32 to i64 conversions are supported.
5982   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
5983     return false;
5984 
5985   if (Node->isStrictFPOpcode())
5986     // When a NaN is converted to an integer a trap is allowed. We can't
5987     // use this expansion here because it would eliminate that trap. Other
5988     // traps are also allowed and cannot be eliminated. See
5989     // IEEE 754-2008 sec 5.8.
5990     return false;
5991 
5992   // Expand f32 -> i64 conversion
5993   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5994   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
5995   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5996   EVT IntVT = SrcVT.changeTypeToInteger();
5997   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
5998 
5999   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6000   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6001   SDValue Bias = DAG.getConstant(127, dl, IntVT);
6002   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6003   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6004   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6005 
6006   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6007 
6008   SDValue ExponentBits = DAG.getNode(
6009       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6010       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6011   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6012 
6013   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6014                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6015                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6016   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6017 
6018   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6019                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6020                           DAG.getConstant(0x00800000, dl, IntVT));
6021 
6022   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6023 
6024   R = DAG.getSelectCC(
6025       dl, Exponent, ExponentLoBit,
6026       DAG.getNode(ISD::SHL, dl, DstVT, R,
6027                   DAG.getZExtOrTrunc(
6028                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6029                       dl, IntShVT)),
6030       DAG.getNode(ISD::SRL, dl, DstVT, R,
6031                   DAG.getZExtOrTrunc(
6032                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6033                       dl, IntShVT)),
6034       ISD::SETGT);
6035 
6036   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6037                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6038 
6039   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6040                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6041   return true;
6042 }
6043 
6044 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6045                                       SDValue &Chain,
6046                                       SelectionDAG &DAG) const {
6047   SDLoc dl(SDValue(Node, 0));
6048   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6049   SDValue Src = Node->getOperand(OpNo);
6050 
6051   EVT SrcVT = Src.getValueType();
6052   EVT DstVT = Node->getValueType(0);
6053   EVT SetCCVT =
6054       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6055 
6056   // Only expand vector types if we have the appropriate vector bit operations.
6057   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6058                                                    ISD::FP_TO_SINT;
6059   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6060                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6061     return false;
6062 
6063   // If the maximum float value is smaller then the signed integer range,
6064   // the destination signmask can't be represented by the float, so we can
6065   // just use FP_TO_SINT directly.
6066   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6067   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6068   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6069   if (APFloat::opOverflow &
6070       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6071     if (Node->isStrictFPOpcode()) {
6072       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6073                            { Node->getOperand(0), Src });
6074       Chain = Result.getValue(1);
6075     } else
6076       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6077     return true;
6078   }
6079 
6080   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6081   SDValue Sel;
6082 
6083   if (Node->isStrictFPOpcode())
6084     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6085                        Node->getOperand(0), /*IsSignaling*/ true);
6086   else
6087     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6088 
6089   bool Strict = Node->isStrictFPOpcode() ||
6090                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6091 
6092   if (Strict) {
6093     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6094     // signmask then offset (the result of which should be fully representable).
6095     // Sel = Src < 0x8000000000000000
6096     // FltOfs = select Sel, 0, 0x8000000000000000
6097     // IntOfs = select Sel, 0, 0x8000000000000000
6098     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6099 
6100     // TODO: Should any fast-math-flags be set for the FSUB?
6101     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6102                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6103     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6104                                    DAG.getConstant(0, dl, DstVT),
6105                                    DAG.getConstant(SignMask, dl, DstVT));
6106     SDValue SInt;
6107     if (Node->isStrictFPOpcode()) {
6108       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6109                                 { Node->getOperand(0), Src, FltOfs });
6110       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6111                          { Val.getValue(1), Val });
6112       Chain = SInt.getValue(1);
6113     } else {
6114       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6115       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6116     }
6117     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6118   } else {
6119     // Expand based on maximum range of FP_TO_SINT:
6120     // True = fp_to_sint(Src)
6121     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6122     // Result = select (Src < 0x8000000000000000), True, False
6123 
6124     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6125     // TODO: Should any fast-math-flags be set for the FSUB?
6126     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6127                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6128     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6129                         DAG.getConstant(SignMask, dl, DstVT));
6130     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6131   }
6132   return true;
6133 }
6134 
6135 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6136                                       SDValue &Chain,
6137                                       SelectionDAG &DAG) const {
6138   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6139   SDValue Src = Node->getOperand(OpNo);
6140   EVT SrcVT = Src.getValueType();
6141   EVT DstVT = Node->getValueType(0);
6142 
6143   if (SrcVT.getScalarType() != MVT::i64)
6144     return false;
6145 
6146   SDLoc dl(SDValue(Node, 0));
6147   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6148 
6149   if (DstVT.getScalarType() == MVT::f32) {
6150     // Only expand vector types if we have the appropriate vector bit
6151     // operations.
6152     if (SrcVT.isVector() &&
6153         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6154          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6155          !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
6156          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6157          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6158       return false;
6159 
6160     // For unsigned conversions, convert them to signed conversions using the
6161     // algorithm from the x86_64 __floatundisf in compiler_rt.
6162 
6163     // TODO: This really should be implemented using a branch rather than a
6164     // select.  We happen to get lucky and machinesink does the right
6165     // thing most of the time.  This would be a good candidate for a
6166     // pseudo-op, or, even better, for whole-function isel.
6167     EVT SetCCVT =
6168         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6169 
6170     SDValue SignBitTest = DAG.getSetCC(
6171         dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
6172 
6173     SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
6174     SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
6175     SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
6176     SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
6177     SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
6178 
6179     SDValue Slow, Fast;
6180     if (Node->isStrictFPOpcode()) {
6181       // In strict mode, we must avoid spurious exceptions, and therefore
6182       // must make sure to only emit a single STRICT_SINT_TO_FP.
6183       SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
6184       Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
6185                          { Node->getOperand(0), InCvt });
6186       Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
6187                          { Fast.getValue(1), Fast, Fast });
6188       Chain = Slow.getValue(1);
6189       // The STRICT_SINT_TO_FP inherits the exception mode from the
6190       // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
6191       // never raise any exception.
6192       SDNodeFlags Flags;
6193       Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
6194       Fast->setFlags(Flags);
6195       Flags.setNoFPExcept(true);
6196       Slow->setFlags(Flags);
6197     } else {
6198       SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
6199       Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
6200       Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
6201     }
6202 
6203     Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
6204     return true;
6205   }
6206 
6207   if (DstVT.getScalarType() == MVT::f64) {
6208     // Only expand vector types if we have the appropriate vector bit
6209     // operations.
6210     if (SrcVT.isVector() &&
6211         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6212          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6213          !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6214          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6215          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6216       return false;
6217 
6218     // Implementation of unsigned i64 to f64 following the algorithm in
6219     // __floatundidf in compiler_rt. This implementation has the advantage
6220     // of performing rounding correctly, both in the default rounding mode
6221     // and in all alternate rounding modes.
6222     SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6223     SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6224         BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6225     SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6226     SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6227     SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6228 
6229     SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6230     SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6231     SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6232     SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6233     SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6234     SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6235     if (Node->isStrictFPOpcode()) {
6236       SDValue HiSub =
6237           DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
6238                       {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
6239       Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
6240                            {HiSub.getValue(1), LoFlt, HiSub});
6241       Chain = Result.getValue(1);
6242     } else {
6243       SDValue HiSub =
6244           DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6245       Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6246     }
6247     return true;
6248   }
6249 
6250   return false;
6251 }
6252 
6253 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6254                                               SelectionDAG &DAG) const {
6255   SDLoc dl(Node);
6256   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6257     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6258   EVT VT = Node->getValueType(0);
6259   if (isOperationLegalOrCustom(NewOp, VT)) {
6260     SDValue Quiet0 = Node->getOperand(0);
6261     SDValue Quiet1 = Node->getOperand(1);
6262 
6263     if (!Node->getFlags().hasNoNaNs()) {
6264       // Insert canonicalizes if it's possible we need to quiet to get correct
6265       // sNaN behavior.
6266       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6267         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6268                              Node->getFlags());
6269       }
6270       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6271         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6272                              Node->getFlags());
6273       }
6274     }
6275 
6276     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6277   }
6278 
6279   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6280   // instead if there are no NaNs.
6281   if (Node->getFlags().hasNoNaNs()) {
6282     unsigned IEEE2018Op =
6283         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6284     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6285       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6286                          Node->getOperand(1), Node->getFlags());
6287     }
6288   }
6289 
6290   // If none of the above worked, but there are no NaNs, then expand to
6291   // a compare/select sequence.  This is required for correctness since
6292   // InstCombine might have canonicalized a fcmp+select sequence to a
6293   // FMINNUM/FMAXNUM node.  If we were to fall through to the default
6294   // expansion to libcall, we might introduce a link-time dependency
6295   // on libm into a file that originally did not have one.
6296   if (Node->getFlags().hasNoNaNs()) {
6297     ISD::CondCode Pred =
6298         Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6299     SDValue Op1 = Node->getOperand(0);
6300     SDValue Op2 = Node->getOperand(1);
6301     SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6302     // Copy FMF flags, but always set the no-signed-zeros flag
6303     // as this is implied by the FMINNUM/FMAXNUM semantics.
6304     SDNodeFlags Flags = Node->getFlags();
6305     Flags.setNoSignedZeros(true);
6306     SelCC->setFlags(Flags);
6307     return SelCC;
6308   }
6309 
6310   return SDValue();
6311 }
6312 
6313 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6314                                  SelectionDAG &DAG) const {
6315   SDLoc dl(Node);
6316   EVT VT = Node->getValueType(0);
6317   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6318   SDValue Op = Node->getOperand(0);
6319   unsigned Len = VT.getScalarSizeInBits();
6320   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6321 
6322   // TODO: Add support for irregular type lengths.
6323   if (!(Len <= 128 && Len % 8 == 0))
6324     return false;
6325 
6326   // Only expand vector types if we have the appropriate vector bit operations.
6327   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6328                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6329                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6330                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6331                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6332     return false;
6333 
6334   // This is the "best" algorithm from
6335   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6336   SDValue Mask55 =
6337       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6338   SDValue Mask33 =
6339       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6340   SDValue Mask0F =
6341       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6342   SDValue Mask01 =
6343       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6344 
6345   // v = v - ((v >> 1) & 0x55555555...)
6346   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6347                    DAG.getNode(ISD::AND, dl, VT,
6348                                DAG.getNode(ISD::SRL, dl, VT, Op,
6349                                            DAG.getConstant(1, dl, ShVT)),
6350                                Mask55));
6351   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6352   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6353                    DAG.getNode(ISD::AND, dl, VT,
6354                                DAG.getNode(ISD::SRL, dl, VT, Op,
6355                                            DAG.getConstant(2, dl, ShVT)),
6356                                Mask33));
6357   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6358   Op = DAG.getNode(ISD::AND, dl, VT,
6359                    DAG.getNode(ISD::ADD, dl, VT, Op,
6360                                DAG.getNode(ISD::SRL, dl, VT, Op,
6361                                            DAG.getConstant(4, dl, ShVT))),
6362                    Mask0F);
6363   // v = (v * 0x01010101...) >> (Len - 8)
6364   if (Len > 8)
6365     Op =
6366         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6367                     DAG.getConstant(Len - 8, dl, ShVT));
6368 
6369   Result = Op;
6370   return true;
6371 }
6372 
6373 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6374                                 SelectionDAG &DAG) const {
6375   SDLoc dl(Node);
6376   EVT VT = Node->getValueType(0);
6377   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6378   SDValue Op = Node->getOperand(0);
6379   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6380 
6381   // If the non-ZERO_UNDEF version is supported we can use that instead.
6382   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6383       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6384     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6385     return true;
6386   }
6387 
6388   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6389   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6390     EVT SetCCVT =
6391         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6392     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6393     SDValue Zero = DAG.getConstant(0, dl, VT);
6394     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6395     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6396                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6397     return true;
6398   }
6399 
6400   // Only expand vector types if we have the appropriate vector bit operations.
6401   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6402                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6403                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6404                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6405     return false;
6406 
6407   // for now, we do this:
6408   // x = x | (x >> 1);
6409   // x = x | (x >> 2);
6410   // ...
6411   // x = x | (x >>16);
6412   // x = x | (x >>32); // for 64-bit input
6413   // return popcount(~x);
6414   //
6415   // Ref: "Hacker's Delight" by Henry Warren
6416   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6417     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6418     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6419                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6420   }
6421   Op = DAG.getNOT(dl, Op, VT);
6422   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6423   return true;
6424 }
6425 
6426 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6427                                 SelectionDAG &DAG) const {
6428   SDLoc dl(Node);
6429   EVT VT = Node->getValueType(0);
6430   SDValue Op = Node->getOperand(0);
6431   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6432 
6433   // If the non-ZERO_UNDEF version is supported we can use that instead.
6434   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6435       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6436     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6437     return true;
6438   }
6439 
6440   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6441   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6442     EVT SetCCVT =
6443         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6444     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6445     SDValue Zero = DAG.getConstant(0, dl, VT);
6446     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6447     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6448                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6449     return true;
6450   }
6451 
6452   // Only expand vector types if we have the appropriate vector bit operations.
6453   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6454                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6455                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6456                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6457                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6458                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6459     return false;
6460 
6461   // for now, we use: { return popcount(~x & (x - 1)); }
6462   // unless the target has ctlz but not ctpop, in which case we use:
6463   // { return 32 - nlz(~x & (x-1)); }
6464   // Ref: "Hacker's Delight" by Henry Warren
6465   SDValue Tmp = DAG.getNode(
6466       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6467       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6468 
6469   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6470   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6471     Result =
6472         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6473                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6474     return true;
6475   }
6476 
6477   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6478   return true;
6479 }
6480 
6481 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6482                                SelectionDAG &DAG) const {
6483   SDLoc dl(N);
6484   EVT VT = N->getValueType(0);
6485   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6486   SDValue Op = N->getOperand(0);
6487 
6488   // Only expand vector types if we have the appropriate vector operations.
6489   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6490                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6491                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6492     return false;
6493 
6494   SDValue Shift =
6495       DAG.getNode(ISD::SRA, dl, VT, Op,
6496                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6497   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6498   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6499   return true;
6500 }
6501 
6502 std::pair<SDValue, SDValue>
6503 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6504                                     SelectionDAG &DAG) const {
6505   SDLoc SL(LD);
6506   SDValue Chain = LD->getChain();
6507   SDValue BasePTR = LD->getBasePtr();
6508   EVT SrcVT = LD->getMemoryVT();
6509   ISD::LoadExtType ExtType = LD->getExtensionType();
6510 
6511   unsigned NumElem = SrcVT.getVectorNumElements();
6512 
6513   EVT SrcEltVT = SrcVT.getScalarType();
6514   EVT DstEltVT = LD->getValueType(0).getScalarType();
6515 
6516   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6517   assert(SrcEltVT.isByteSized());
6518 
6519   SmallVector<SDValue, 8> Vals;
6520   SmallVector<SDValue, 8> LoadChains;
6521 
6522   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6523     SDValue ScalarLoad =
6524         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6525                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6526                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6527                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6528 
6529     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6530 
6531     Vals.push_back(ScalarLoad.getValue(0));
6532     LoadChains.push_back(ScalarLoad.getValue(1));
6533   }
6534 
6535   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6536   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6537 
6538   return std::make_pair(Value, NewChain);
6539 }
6540 
6541 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6542                                              SelectionDAG &DAG) const {
6543   SDLoc SL(ST);
6544 
6545   SDValue Chain = ST->getChain();
6546   SDValue BasePtr = ST->getBasePtr();
6547   SDValue Value = ST->getValue();
6548   EVT StVT = ST->getMemoryVT();
6549 
6550   // The type of the data we want to save
6551   EVT RegVT = Value.getValueType();
6552   EVT RegSclVT = RegVT.getScalarType();
6553 
6554   // The type of data as saved in memory.
6555   EVT MemSclVT = StVT.getScalarType();
6556 
6557   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
6558   unsigned NumElem = StVT.getVectorNumElements();
6559 
6560   // A vector must always be stored in memory as-is, i.e. without any padding
6561   // between the elements, since various code depend on it, e.g. in the
6562   // handling of a bitcast of a vector type to int, which may be done with a
6563   // vector store followed by an integer load. A vector that does not have
6564   // elements that are byte-sized must therefore be stored as an integer
6565   // built out of the extracted vector elements.
6566   if (!MemSclVT.isByteSized()) {
6567     unsigned NumBits = StVT.getSizeInBits();
6568     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6569 
6570     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6571 
6572     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6573       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6574                                 DAG.getConstant(Idx, SL, IdxVT));
6575       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6576       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6577       unsigned ShiftIntoIdx =
6578           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6579       SDValue ShiftAmount =
6580           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6581       SDValue ShiftedElt =
6582           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6583       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6584     }
6585 
6586     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6587                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6588                         ST->getAAInfo());
6589   }
6590 
6591   // Store Stride in bytes
6592   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6593   assert(Stride && "Zero stride!");
6594   // Extract each of the elements from the original vector and save them into
6595   // memory individually.
6596   SmallVector<SDValue, 8> Stores;
6597   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6598     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6599                               DAG.getConstant(Idx, SL, IdxVT));
6600 
6601     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6602 
6603     // This scalar TruncStore may be illegal, but we legalize it later.
6604     SDValue Store = DAG.getTruncStore(
6605         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6606         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6607         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6608 
6609     Stores.push_back(Store);
6610   }
6611 
6612   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6613 }
6614 
6615 std::pair<SDValue, SDValue>
6616 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6617   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6618          "unaligned indexed loads not implemented!");
6619   SDValue Chain = LD->getChain();
6620   SDValue Ptr = LD->getBasePtr();
6621   EVT VT = LD->getValueType(0);
6622   EVT LoadedVT = LD->getMemoryVT();
6623   SDLoc dl(LD);
6624   auto &MF = DAG.getMachineFunction();
6625 
6626   if (VT.isFloatingPoint() || VT.isVector()) {
6627     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6628     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6629       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6630           LoadedVT.isVector()) {
6631         // Scalarize the load and let the individual components be handled.
6632         return scalarizeVectorLoad(LD, DAG);
6633       }
6634 
6635       // Expand to a (misaligned) integer load of the same size,
6636       // then bitconvert to floating point or vector.
6637       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6638                                     LD->getMemOperand());
6639       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6640       if (LoadedVT != VT)
6641         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6642                              ISD::ANY_EXTEND, dl, VT, Result);
6643 
6644       return std::make_pair(Result, newLoad.getValue(1));
6645     }
6646 
6647     // Copy the value to a (aligned) stack slot using (unaligned) integer
6648     // loads and stores, then do a (aligned) load from the stack slot.
6649     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6650     unsigned LoadedBytes = LoadedVT.getStoreSize();
6651     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6652     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6653 
6654     // Make sure the stack slot is also aligned for the register type.
6655     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6656     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6657     SmallVector<SDValue, 8> Stores;
6658     SDValue StackPtr = StackBase;
6659     unsigned Offset = 0;
6660 
6661     EVT PtrVT = Ptr.getValueType();
6662     EVT StackPtrVT = StackPtr.getValueType();
6663 
6664     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6665     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6666 
6667     // Do all but one copies using the full register width.
6668     for (unsigned i = 1; i < NumRegs; i++) {
6669       // Load one integer register's worth from the original location.
6670       SDValue Load = DAG.getLoad(
6671           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6672           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6673           LD->getAAInfo());
6674       // Follow the load with a store to the stack slot.  Remember the store.
6675       Stores.push_back(DAG.getStore(
6676           Load.getValue(1), dl, Load, StackPtr,
6677           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6678       // Increment the pointers.
6679       Offset += RegBytes;
6680 
6681       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6682       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6683     }
6684 
6685     // The last copy may be partial.  Do an extending load.
6686     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6687                                   8 * (LoadedBytes - Offset));
6688     SDValue Load =
6689         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6690                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6691                        MinAlign(LD->getAlignment(), Offset),
6692                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6693     // Follow the load with a store to the stack slot.  Remember the store.
6694     // On big-endian machines this requires a truncating store to ensure
6695     // that the bits end up in the right place.
6696     Stores.push_back(DAG.getTruncStore(
6697         Load.getValue(1), dl, Load, StackPtr,
6698         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6699 
6700     // The order of the stores doesn't matter - say it with a TokenFactor.
6701     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6702 
6703     // Finally, perform the original load only redirected to the stack slot.
6704     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6705                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6706                           LoadedVT);
6707 
6708     // Callers expect a MERGE_VALUES node.
6709     return std::make_pair(Load, TF);
6710   }
6711 
6712   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6713          "Unaligned load of unsupported type.");
6714 
6715   // Compute the new VT that is half the size of the old one.  This is an
6716   // integer MVT.
6717   unsigned NumBits = LoadedVT.getSizeInBits();
6718   EVT NewLoadedVT;
6719   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6720   NumBits >>= 1;
6721 
6722   unsigned Alignment = LD->getAlignment();
6723   unsigned IncrementSize = NumBits / 8;
6724   ISD::LoadExtType HiExtType = LD->getExtensionType();
6725 
6726   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6727   if (HiExtType == ISD::NON_EXTLOAD)
6728     HiExtType = ISD::ZEXTLOAD;
6729 
6730   // Load the value in two parts
6731   SDValue Lo, Hi;
6732   if (DAG.getDataLayout().isLittleEndian()) {
6733     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6734                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6735                         LD->getAAInfo());
6736 
6737     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6738     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6739                         LD->getPointerInfo().getWithOffset(IncrementSize),
6740                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6741                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6742   } else {
6743     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6744                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6745                         LD->getAAInfo());
6746 
6747     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6748     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6749                         LD->getPointerInfo().getWithOffset(IncrementSize),
6750                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6751                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6752   }
6753 
6754   // aggregate the two parts
6755   SDValue ShiftAmount =
6756       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6757                                                     DAG.getDataLayout()));
6758   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6759   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6760 
6761   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6762                              Hi.getValue(1));
6763 
6764   return std::make_pair(Result, TF);
6765 }
6766 
6767 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6768                                              SelectionDAG &DAG) const {
6769   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6770          "unaligned indexed stores not implemented!");
6771   SDValue Chain = ST->getChain();
6772   SDValue Ptr = ST->getBasePtr();
6773   SDValue Val = ST->getValue();
6774   EVT VT = Val.getValueType();
6775   int Alignment = ST->getAlignment();
6776   auto &MF = DAG.getMachineFunction();
6777   EVT StoreMemVT = ST->getMemoryVT();
6778 
6779   SDLoc dl(ST);
6780   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6781     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6782     if (isTypeLegal(intVT)) {
6783       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6784           StoreMemVT.isVector()) {
6785         // Scalarize the store and let the individual components be handled.
6786         SDValue Result = scalarizeVectorStore(ST, DAG);
6787         return Result;
6788       }
6789       // Expand to a bitconvert of the value to the integer type of the
6790       // same size, then a (misaligned) int store.
6791       // FIXME: Does not handle truncating floating point stores!
6792       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
6793       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
6794                             Alignment, ST->getMemOperand()->getFlags());
6795       return Result;
6796     }
6797     // Do a (aligned) store to a stack slot, then copy from the stack slot
6798     // to the final destination using (unaligned) integer loads and stores.
6799     MVT RegVT = getRegisterType(
6800         *DAG.getContext(),
6801         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
6802     EVT PtrVT = Ptr.getValueType();
6803     unsigned StoredBytes = StoreMemVT.getStoreSize();
6804     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6805     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
6806 
6807     // Make sure the stack slot is also aligned for the register type.
6808     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
6809     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6810 
6811     // Perform the original store, only redirected to the stack slot.
6812     SDValue Store = DAG.getTruncStore(
6813         Chain, dl, Val, StackPtr,
6814         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
6815 
6816     EVT StackPtrVT = StackPtr.getValueType();
6817 
6818     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6819     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6820     SmallVector<SDValue, 8> Stores;
6821     unsigned Offset = 0;
6822 
6823     // Do all but one copies using the full register width.
6824     for (unsigned i = 1; i < NumRegs; i++) {
6825       // Load one integer register's worth from the stack slot.
6826       SDValue Load = DAG.getLoad(
6827           RegVT, dl, Store, StackPtr,
6828           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
6829       // Store it to the final location.  Remember the store.
6830       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
6831                                     ST->getPointerInfo().getWithOffset(Offset),
6832                                     MinAlign(ST->getAlignment(), Offset),
6833                                     ST->getMemOperand()->getFlags()));
6834       // Increment the pointers.
6835       Offset += RegBytes;
6836       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6837       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6838     }
6839 
6840     // The last store may be partial.  Do a truncating store.  On big-endian
6841     // machines this requires an extending load from the stack slot to ensure
6842     // that the bits are in the right place.
6843     EVT LoadMemVT =
6844         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
6845 
6846     // Load from the stack slot.
6847     SDValue Load = DAG.getExtLoad(
6848         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
6849         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
6850 
6851     Stores.push_back(
6852         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
6853                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
6854                           MinAlign(ST->getAlignment(), Offset),
6855                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
6856     // The order of the stores doesn't matter - say it with a TokenFactor.
6857     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6858     return Result;
6859   }
6860 
6861   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
6862          "Unaligned store of unknown type.");
6863   // Get the half-size VT
6864   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
6865   int NumBits = NewStoredVT.getSizeInBits();
6866   int IncrementSize = NumBits / 8;
6867 
6868   // Divide the stored value in two parts.
6869   SDValue ShiftAmount = DAG.getConstant(
6870       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
6871   SDValue Lo = Val;
6872   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
6873 
6874   // Store the two parts
6875   SDValue Store1, Store2;
6876   Store1 = DAG.getTruncStore(Chain, dl,
6877                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
6878                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
6879                              ST->getMemOperand()->getFlags());
6880 
6881   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6882   Alignment = MinAlign(Alignment, IncrementSize);
6883   Store2 = DAG.getTruncStore(
6884       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
6885       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
6886       ST->getMemOperand()->getFlags(), ST->getAAInfo());
6887 
6888   SDValue Result =
6889       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
6890   return Result;
6891 }
6892 
6893 SDValue
6894 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
6895                                        const SDLoc &DL, EVT DataVT,
6896                                        SelectionDAG &DAG,
6897                                        bool IsCompressedMemory) const {
6898   SDValue Increment;
6899   EVT AddrVT = Addr.getValueType();
6900   EVT MaskVT = Mask.getValueType();
6901   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
6902          "Incompatible types of Data and Mask");
6903   if (IsCompressedMemory) {
6904     // Incrementing the pointer according to number of '1's in the mask.
6905     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
6906     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
6907     if (MaskIntVT.getSizeInBits() < 32) {
6908       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
6909       MaskIntVT = MVT::i32;
6910     }
6911 
6912     // Count '1's with POPCNT.
6913     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
6914     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
6915     // Scale is an element size in bytes.
6916     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
6917                                     AddrVT);
6918     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
6919   } else
6920     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
6921 
6922   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
6923 }
6924 
6925 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
6926                                        SDValue Idx,
6927                                        EVT VecVT,
6928                                        const SDLoc &dl) {
6929   if (isa<ConstantSDNode>(Idx))
6930     return Idx;
6931 
6932   EVT IdxVT = Idx.getValueType();
6933   unsigned NElts = VecVT.getVectorNumElements();
6934   if (isPowerOf2_32(NElts)) {
6935     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
6936                                      Log2_32(NElts));
6937     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
6938                        DAG.getConstant(Imm, dl, IdxVT));
6939   }
6940 
6941   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
6942                      DAG.getConstant(NElts - 1, dl, IdxVT));
6943 }
6944 
6945 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
6946                                                 SDValue VecPtr, EVT VecVT,
6947                                                 SDValue Index) const {
6948   SDLoc dl(Index);
6949   // Make sure the index type is big enough to compute in.
6950   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
6951 
6952   EVT EltVT = VecVT.getVectorElementType();
6953 
6954   // Calculate the element offset and add it to the pointer.
6955   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
6956   assert(EltSize * 8 == EltVT.getSizeInBits() &&
6957          "Converting bits to bytes lost precision");
6958 
6959   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
6960 
6961   EVT IdxVT = Index.getValueType();
6962 
6963   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
6964                       DAG.getConstant(EltSize, dl, IdxVT));
6965   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
6966 }
6967 
6968 //===----------------------------------------------------------------------===//
6969 // Implementation of Emulated TLS Model
6970 //===----------------------------------------------------------------------===//
6971 
6972 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
6973                                                 SelectionDAG &DAG) const {
6974   // Access to address of TLS varialbe xyz is lowered to a function call:
6975   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
6976   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6977   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
6978   SDLoc dl(GA);
6979 
6980   ArgListTy Args;
6981   ArgListEntry Entry;
6982   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
6983   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
6984   StringRef EmuTlsVarName(NameString);
6985   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
6986   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
6987   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
6988   Entry.Ty = VoidPtrType;
6989   Args.push_back(Entry);
6990 
6991   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
6992 
6993   TargetLowering::CallLoweringInfo CLI(DAG);
6994   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
6995   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
6996   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6997 
6998   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
6999   // At last for X86 targets, maybe good for other targets too?
7000   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7001   MFI.setAdjustsStack(true); // Is this only for X86 target?
7002   MFI.setHasCalls(true);
7003 
7004   assert((GA->getOffset() == 0) &&
7005          "Emulated TLS must have zero offset in GlobalAddressSDNode");
7006   return CallResult.first;
7007 }
7008 
7009 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7010                                                 SelectionDAG &DAG) const {
7011   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7012   if (!isCtlzFast())
7013     return SDValue();
7014   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7015   SDLoc dl(Op);
7016   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7017     if (C->isNullValue() && CC == ISD::SETEQ) {
7018       EVT VT = Op.getOperand(0).getValueType();
7019       SDValue Zext = Op.getOperand(0);
7020       if (VT.bitsLT(MVT::i32)) {
7021         VT = MVT::i32;
7022         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7023       }
7024       unsigned Log2b = Log2_32(VT.getSizeInBits());
7025       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7026       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7027                                 DAG.getConstant(Log2b, dl, MVT::i32));
7028       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7029     }
7030   }
7031   return SDValue();
7032 }
7033 
7034 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7035   unsigned Opcode = Node->getOpcode();
7036   SDValue LHS = Node->getOperand(0);
7037   SDValue RHS = Node->getOperand(1);
7038   EVT VT = LHS.getValueType();
7039   SDLoc dl(Node);
7040 
7041   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7042   assert(VT.isInteger() && "Expected operands to be integers");
7043 
7044   // usub.sat(a, b) -> umax(a, b) - b
7045   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
7046     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7047     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7048   }
7049 
7050   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
7051     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7052     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7053     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7054   }
7055 
7056   unsigned OverflowOp;
7057   switch (Opcode) {
7058   case ISD::SADDSAT:
7059     OverflowOp = ISD::SADDO;
7060     break;
7061   case ISD::UADDSAT:
7062     OverflowOp = ISD::UADDO;
7063     break;
7064   case ISD::SSUBSAT:
7065     OverflowOp = ISD::SSUBO;
7066     break;
7067   case ISD::USUBSAT:
7068     OverflowOp = ISD::USUBO;
7069     break;
7070   default:
7071     llvm_unreachable("Expected method to receive signed or unsigned saturation "
7072                      "addition or subtraction node.");
7073   }
7074 
7075   unsigned BitWidth = LHS.getScalarValueSizeInBits();
7076   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7077   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7078                                LHS, RHS);
7079   SDValue SumDiff = Result.getValue(0);
7080   SDValue Overflow = Result.getValue(1);
7081   SDValue Zero = DAG.getConstant(0, dl, VT);
7082   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7083 
7084   if (Opcode == ISD::UADDSAT) {
7085     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7086       // (LHS + RHS) | OverflowMask
7087       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7088       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7089     }
7090     // Overflow ? 0xffff.... : (LHS + RHS)
7091     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7092   } else if (Opcode == ISD::USUBSAT) {
7093     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7094       // (LHS - RHS) & ~OverflowMask
7095       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7096       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7097       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7098     }
7099     // Overflow ? 0 : (LHS - RHS)
7100     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7101   } else {
7102     // SatMax -> Overflow && SumDiff < 0
7103     // SatMin -> Overflow && SumDiff >= 0
7104     APInt MinVal = APInt::getSignedMinValue(BitWidth);
7105     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7106     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7107     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7108     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7109     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7110     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7111   }
7112 }
7113 
7114 SDValue
7115 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7116   assert((Node->getOpcode() == ISD::SMULFIX ||
7117           Node->getOpcode() == ISD::UMULFIX ||
7118           Node->getOpcode() == ISD::SMULFIXSAT ||
7119           Node->getOpcode() == ISD::UMULFIXSAT) &&
7120          "Expected a fixed point multiplication opcode");
7121 
7122   SDLoc dl(Node);
7123   SDValue LHS = Node->getOperand(0);
7124   SDValue RHS = Node->getOperand(1);
7125   EVT VT = LHS.getValueType();
7126   unsigned Scale = Node->getConstantOperandVal(2);
7127   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7128                      Node->getOpcode() == ISD::UMULFIXSAT);
7129   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7130                  Node->getOpcode() == ISD::SMULFIXSAT);
7131   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7132   unsigned VTSize = VT.getScalarSizeInBits();
7133 
7134   if (!Scale) {
7135     // [us]mul.fix(a, b, 0) -> mul(a, b)
7136     if (!Saturating) {
7137       if (isOperationLegalOrCustom(ISD::MUL, VT))
7138         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7139     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7140       SDValue Result =
7141           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7142       SDValue Product = Result.getValue(0);
7143       SDValue Overflow = Result.getValue(1);
7144       SDValue Zero = DAG.getConstant(0, dl, VT);
7145 
7146       APInt MinVal = APInt::getSignedMinValue(VTSize);
7147       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7148       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7149       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7150       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7151       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7152       return DAG.getSelect(dl, VT, Overflow, Result, Product);
7153     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7154       SDValue Result =
7155           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7156       SDValue Product = Result.getValue(0);
7157       SDValue Overflow = Result.getValue(1);
7158 
7159       APInt MaxVal = APInt::getMaxValue(VTSize);
7160       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7161       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7162     }
7163   }
7164 
7165   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7166          "Expected scale to be less than the number of bits if signed or at "
7167          "most the number of bits if unsigned.");
7168   assert(LHS.getValueType() == RHS.getValueType() &&
7169          "Expected both operands to be the same type");
7170 
7171   // Get the upper and lower bits of the result.
7172   SDValue Lo, Hi;
7173   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7174   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7175   if (isOperationLegalOrCustom(LoHiOp, VT)) {
7176     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7177     Lo = Result.getValue(0);
7178     Hi = Result.getValue(1);
7179   } else if (isOperationLegalOrCustom(HiOp, VT)) {
7180     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7181     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7182   } else if (VT.isVector()) {
7183     return SDValue();
7184   } else {
7185     report_fatal_error("Unable to expand fixed point multiplication.");
7186   }
7187 
7188   if (Scale == VTSize)
7189     // Result is just the top half since we'd be shifting by the width of the
7190     // operand. Overflow impossible so this works for both UMULFIX and
7191     // UMULFIXSAT.
7192     return Hi;
7193 
7194   // The result will need to be shifted right by the scale since both operands
7195   // are scaled. The result is given to us in 2 halves, so we only want part of
7196   // both in the result.
7197   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7198   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7199                                DAG.getConstant(Scale, dl, ShiftTy));
7200   if (!Saturating)
7201     return Result;
7202 
7203   if (!Signed) {
7204     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7205     // widened multiplication) aren't all zeroes.
7206 
7207     // Saturate to max if ((Hi >> Scale) != 0),
7208     // which is the same as if (Hi > ((1 << Scale) - 1))
7209     APInt MaxVal = APInt::getMaxValue(VTSize);
7210     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7211                                       dl, VT);
7212     Result = DAG.getSelectCC(dl, Hi, LowMask,
7213                              DAG.getConstant(MaxVal, dl, VT), Result,
7214                              ISD::SETUGT);
7215 
7216     return Result;
7217   }
7218 
7219   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7220   // widened multiplication) aren't all ones or all zeroes.
7221 
7222   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7223   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7224 
7225   if (Scale == 0) {
7226     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7227                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
7228     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7229     // Saturated to SatMin if wide product is negative, and SatMax if wide
7230     // product is positive ...
7231     SDValue Zero = DAG.getConstant(0, dl, VT);
7232     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7233                                                ISD::SETLT);
7234     // ... but only if we overflowed.
7235     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7236   }
7237 
7238   //  We handled Scale==0 above so all the bits to examine is in Hi.
7239 
7240   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7241   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7242   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7243                                     dl, VT);
7244   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7245   // Saturate to min if (Hi >> (Scale - 1)) < -1),
7246   // which is the same as if (HI < (-1 << (Scale - 1))
7247   SDValue HighMask =
7248       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7249                       dl, VT);
7250   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7251   return Result;
7252 }
7253 
7254 void TargetLowering::expandUADDSUBO(
7255     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7256   SDLoc dl(Node);
7257   SDValue LHS = Node->getOperand(0);
7258   SDValue RHS = Node->getOperand(1);
7259   bool IsAdd = Node->getOpcode() == ISD::UADDO;
7260 
7261   // If ADD/SUBCARRY is legal, use that instead.
7262   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7263   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7264     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7265     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7266                                     { LHS, RHS, CarryIn });
7267     Result = SDValue(NodeCarry.getNode(), 0);
7268     Overflow = SDValue(NodeCarry.getNode(), 1);
7269     return;
7270   }
7271 
7272   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7273                             LHS.getValueType(), LHS, RHS);
7274 
7275   EVT ResultType = Node->getValueType(1);
7276   EVT SetCCType = getSetCCResultType(
7277       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7278   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7279   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7280   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7281 }
7282 
7283 void TargetLowering::expandSADDSUBO(
7284     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7285   SDLoc dl(Node);
7286   SDValue LHS = Node->getOperand(0);
7287   SDValue RHS = Node->getOperand(1);
7288   bool IsAdd = Node->getOpcode() == ISD::SADDO;
7289 
7290   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7291                             LHS.getValueType(), LHS, RHS);
7292 
7293   EVT ResultType = Node->getValueType(1);
7294   EVT OType = getSetCCResultType(
7295       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7296 
7297   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7298   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7299   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7300     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7301     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7302     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7303     return;
7304   }
7305 
7306   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7307 
7308   // For an addition, the result should be less than one of the operands (LHS)
7309   // if and only if the other operand (RHS) is negative, otherwise there will
7310   // be overflow.
7311   // For a subtraction, the result should be less than one of the operands
7312   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7313   // otherwise there will be overflow.
7314   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7315   SDValue ConditionRHS =
7316       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7317 
7318   Overflow = DAG.getBoolExtOrTrunc(
7319       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7320       ResultType, ResultType);
7321 }
7322 
7323 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7324                                 SDValue &Overflow, SelectionDAG &DAG) const {
7325   SDLoc dl(Node);
7326   EVT VT = Node->getValueType(0);
7327   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7328   SDValue LHS = Node->getOperand(0);
7329   SDValue RHS = Node->getOperand(1);
7330   bool isSigned = Node->getOpcode() == ISD::SMULO;
7331 
7332   // For power-of-two multiplications we can use a simpler shift expansion.
7333   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7334     const APInt &C = RHSC->getAPIntValue();
7335     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7336     if (C.isPowerOf2()) {
7337       // smulo(x, signed_min) is same as umulo(x, signed_min).
7338       bool UseArithShift = isSigned && !C.isMinSignedValue();
7339       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7340       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7341       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7342       Overflow = DAG.getSetCC(dl, SetCCVT,
7343           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7344                       dl, VT, Result, ShiftAmt),
7345           LHS, ISD::SETNE);
7346       return true;
7347     }
7348   }
7349 
7350   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7351   if (VT.isVector())
7352     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7353                               VT.getVectorNumElements());
7354 
7355   SDValue BottomHalf;
7356   SDValue TopHalf;
7357   static const unsigned Ops[2][3] =
7358       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7359         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7360   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7361     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7362     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7363   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7364     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7365                              RHS);
7366     TopHalf = BottomHalf.getValue(1);
7367   } else if (isTypeLegal(WideVT)) {
7368     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7369     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7370     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7371     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7372     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7373         getShiftAmountTy(WideVT, DAG.getDataLayout()));
7374     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7375                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7376   } else {
7377     if (VT.isVector())
7378       return false;
7379 
7380     // We can fall back to a libcall with an illegal type for the MUL if we
7381     // have a libcall big enough.
7382     // Also, we can fall back to a division in some cases, but that's a big
7383     // performance hit in the general case.
7384     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7385     if (WideVT == MVT::i16)
7386       LC = RTLIB::MUL_I16;
7387     else if (WideVT == MVT::i32)
7388       LC = RTLIB::MUL_I32;
7389     else if (WideVT == MVT::i64)
7390       LC = RTLIB::MUL_I64;
7391     else if (WideVT == MVT::i128)
7392       LC = RTLIB::MUL_I128;
7393     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7394 
7395     SDValue HiLHS;
7396     SDValue HiRHS;
7397     if (isSigned) {
7398       // The high part is obtained by SRA'ing all but one of the bits of low
7399       // part.
7400       unsigned LoSize = VT.getSizeInBits();
7401       HiLHS =
7402           DAG.getNode(ISD::SRA, dl, VT, LHS,
7403                       DAG.getConstant(LoSize - 1, dl,
7404                                       getPointerTy(DAG.getDataLayout())));
7405       HiRHS =
7406           DAG.getNode(ISD::SRA, dl, VT, RHS,
7407                       DAG.getConstant(LoSize - 1, dl,
7408                                       getPointerTy(DAG.getDataLayout())));
7409     } else {
7410         HiLHS = DAG.getConstant(0, dl, VT);
7411         HiRHS = DAG.getConstant(0, dl, VT);
7412     }
7413 
7414     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7415     // pre-lowered to the correct types. This all depends upon WideVT not
7416     // being a legal type for the architecture and thus has to be split to
7417     // two arguments.
7418     SDValue Ret;
7419     TargetLowering::MakeLibCallOptions CallOptions;
7420     CallOptions.setSExt(isSigned);
7421     CallOptions.setIsPostTypeLegalization(true);
7422     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7423       // Halves of WideVT are packed into registers in different order
7424       // depending on platform endianness. This is usually handled by
7425       // the C calling convention, but we can't defer to it in
7426       // the legalizer.
7427       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7428       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7429     } else {
7430       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7431       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7432     }
7433     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7434            "Ret value is a collection of constituent nodes holding result.");
7435     if (DAG.getDataLayout().isLittleEndian()) {
7436       // Same as above.
7437       BottomHalf = Ret.getOperand(0);
7438       TopHalf = Ret.getOperand(1);
7439     } else {
7440       BottomHalf = Ret.getOperand(1);
7441       TopHalf = Ret.getOperand(0);
7442     }
7443   }
7444 
7445   Result = BottomHalf;
7446   if (isSigned) {
7447     SDValue ShiftAmt = DAG.getConstant(
7448         VT.getScalarSizeInBits() - 1, dl,
7449         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7450     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7451     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7452   } else {
7453     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7454                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7455   }
7456 
7457   // Truncate the result if SetCC returns a larger type than needed.
7458   EVT RType = Node->getValueType(1);
7459   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7460     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7461 
7462   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7463          "Unexpected result type for S/UMULO legalization");
7464   return true;
7465 }
7466 
7467 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7468   SDLoc dl(Node);
7469   bool NoNaN = Node->getFlags().hasNoNaNs();
7470   unsigned BaseOpcode = 0;
7471   switch (Node->getOpcode()) {
7472   default: llvm_unreachable("Expected VECREDUCE opcode");
7473   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7474   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7475   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7476   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7477   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7478   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7479   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7480   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7481   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7482   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7483   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7484   case ISD::VECREDUCE_FMAX:
7485     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7486     break;
7487   case ISD::VECREDUCE_FMIN:
7488     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7489     break;
7490   }
7491 
7492   SDValue Op = Node->getOperand(0);
7493   EVT VT = Op.getValueType();
7494 
7495   // Try to use a shuffle reduction for power of two vectors.
7496   if (VT.isPow2VectorType()) {
7497     while (VT.getVectorNumElements() > 1) {
7498       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7499       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7500         break;
7501 
7502       SDValue Lo, Hi;
7503       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7504       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7505       VT = HalfVT;
7506     }
7507   }
7508 
7509   EVT EltVT = VT.getVectorElementType();
7510   unsigned NumElts = VT.getVectorNumElements();
7511 
7512   SmallVector<SDValue, 8> Ops;
7513   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7514 
7515   SDValue Res = Ops[0];
7516   for (unsigned i = 1; i < NumElts; i++)
7517     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7518 
7519   // Result type may be wider than element type.
7520   if (EltVT != Node->getValueType(0))
7521     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7522   return Res;
7523 }
7524