1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // Conservatively require the attributes of the call to match those of
56   // the return. Ignore NoAlias and NonNull because they don't affect the
57   // call sequence.
58   AttributeList CallerAttrs = F.getAttributes();
59   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
60           .removeAttribute(Attribute::NoAlias)
61           .removeAttribute(Attribute::NonNull)
62           .hasAttributes())
63     return false;
64 
65   // It's not safe to eliminate the sign / zero extension of the return value.
66   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
67       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
68     return false;
69 
70   // Check if the only use is a function return node.
71   return isUsedByReturnOnly(Node, Chain);
72 }
73 
74 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
75     const uint32_t *CallerPreservedMask,
76     const SmallVectorImpl<CCValAssign> &ArgLocs,
77     const SmallVectorImpl<SDValue> &OutVals) const {
78   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
79     const CCValAssign &ArgLoc = ArgLocs[I];
80     if (!ArgLoc.isRegLoc())
81       continue;
82     Register Reg = ArgLoc.getLocReg();
83     // Only look at callee saved registers.
84     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
85       continue;
86     // Check that we pass the value used for the caller.
87     // (We look for a CopyFromReg reading a virtual register that is used
88     //  for the function live-in value of register Reg)
89     SDValue Value = OutVals[I];
90     if (Value->getOpcode() != ISD::CopyFromReg)
91       return false;
92     unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
93     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
94       return false;
95   }
96   return true;
97 }
98 
99 /// Set CallLoweringInfo attribute flags based on a call instruction
100 /// and called function attributes.
101 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
102                                                      unsigned ArgIdx) {
103   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
104   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
105   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
106   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
107   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
108   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
109   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
110   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
111   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
112   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
113   Alignment = Call->getParamAlignment(ArgIdx);
114   ByValType = nullptr;
115   if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
116     ByValType = Call->getParamByValType(ArgIdx);
117 }
118 
119 /// Generate a libcall taking the given operands as arguments and returning a
120 /// result of type RetVT.
121 std::pair<SDValue, SDValue>
122 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
123                             ArrayRef<SDValue> Ops,
124                             MakeLibCallOptions CallOptions,
125                             const SDLoc &dl,
126                             SDValue InChain) const {
127   if (!InChain)
128     InChain = DAG.getEntryNode();
129 
130   TargetLowering::ArgListTy Args;
131   Args.reserve(Ops.size());
132 
133   TargetLowering::ArgListEntry Entry;
134   for (unsigned i = 0; i < Ops.size(); ++i) {
135     SDValue NewOp = Ops[i];
136     Entry.Node = NewOp;
137     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
138     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
139                                                  CallOptions.IsSExt);
140     Entry.IsZExt = !Entry.IsSExt;
141 
142     if (CallOptions.IsSoften &&
143         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
144       Entry.IsSExt = Entry.IsZExt = false;
145     }
146     Args.push_back(Entry);
147   }
148 
149   if (LC == RTLIB::UNKNOWN_LIBCALL)
150     report_fatal_error("Unsupported library call operation!");
151   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
152                                          getPointerTy(DAG.getDataLayout()));
153 
154   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
155   TargetLowering::CallLoweringInfo CLI(DAG);
156   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
157   bool zeroExtend = !signExtend;
158 
159   if (CallOptions.IsSoften &&
160       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
161     signExtend = zeroExtend = false;
162   }
163 
164   CLI.setDebugLoc(dl)
165       .setChain(InChain)
166       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
167       .setNoReturn(CallOptions.DoesNotReturn)
168       .setDiscardResult(!CallOptions.IsReturnValueUsed)
169       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
170       .setSExtResult(signExtend)
171       .setZExtResult(zeroExtend);
172   return LowerCallTo(CLI);
173 }
174 
175 bool
176 TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
177                                          unsigned Limit, uint64_t Size,
178                                          unsigned DstAlign, unsigned SrcAlign,
179                                          bool IsMemset,
180                                          bool ZeroMemset,
181                                          bool MemcpyStrSrc,
182                                          bool AllowOverlap,
183                                          unsigned DstAS, unsigned SrcAS,
184                                          const AttributeList &FuncAttributes) const {
185   // If 'SrcAlign' is zero, that means the memory operation does not need to
186   // load the value, i.e. memset or memcpy from constant string. Otherwise,
187   // it's the inferred alignment of the source. 'DstAlign', on the other hand,
188   // is the specified alignment of the memory operation. If it is zero, that
189   // means it's possible to change the alignment of the destination.
190   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
191   // not need to be loaded.
192   if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
193     return false;
194 
195   EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
196                                IsMemset, ZeroMemset, MemcpyStrSrc,
197                                FuncAttributes);
198 
199   if (VT == MVT::Other) {
200     // Use the largest integer type whose alignment constraints are satisfied.
201     // We only need to check DstAlign here as SrcAlign is always greater or
202     // equal to DstAlign (or zero).
203     VT = MVT::i64;
204     while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
205            !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
206       VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
207     assert(VT.isInteger());
208 
209     // Find the largest legal integer type.
210     MVT LVT = MVT::i64;
211     while (!isTypeLegal(LVT))
212       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
213     assert(LVT.isInteger());
214 
215     // If the type we've chosen is larger than the largest legal integer type
216     // then use that instead.
217     if (VT.bitsGT(LVT))
218       VT = LVT;
219   }
220 
221   unsigned NumMemOps = 0;
222   while (Size != 0) {
223     unsigned VTSize = VT.getSizeInBits() / 8;
224     while (VTSize > Size) {
225       // For now, only use non-vector load / store's for the left-over pieces.
226       EVT NewVT = VT;
227       unsigned NewVTSize;
228 
229       bool Found = false;
230       if (VT.isVector() || VT.isFloatingPoint()) {
231         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
232         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
233             isSafeMemOpType(NewVT.getSimpleVT()))
234           Found = true;
235         else if (NewVT == MVT::i64 &&
236                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
237                  isSafeMemOpType(MVT::f64)) {
238           // i64 is usually not legal on 32-bit targets, but f64 may be.
239           NewVT = MVT::f64;
240           Found = true;
241         }
242       }
243 
244       if (!Found) {
245         do {
246           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
247           if (NewVT == MVT::i8)
248             break;
249         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
250       }
251       NewVTSize = NewVT.getSizeInBits() / 8;
252 
253       // If the new VT cannot cover all of the remaining bits, then consider
254       // issuing a (or a pair of) unaligned and overlapping load / store.
255       bool Fast;
256       if (NumMemOps && AllowOverlap && NewVTSize < Size &&
257           allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
258                                          MachineMemOperand::MONone, &Fast) &&
259           Fast)
260         VTSize = Size;
261       else {
262         VT = NewVT;
263         VTSize = NewVTSize;
264       }
265     }
266 
267     if (++NumMemOps > Limit)
268       return false;
269 
270     MemOps.push_back(VT);
271     Size -= VTSize;
272   }
273 
274   return true;
275 }
276 
277 /// Soften the operands of a comparison. This code is shared among BR_CC,
278 /// SELECT_CC, and SETCC handlers.
279 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
280                                          SDValue &NewLHS, SDValue &NewRHS,
281                                          ISD::CondCode &CCCode,
282                                          const SDLoc &dl, const SDValue OldLHS,
283                                          const SDValue OldRHS) const {
284   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
285          && "Unsupported setcc type!");
286 
287   // Expand into one or more soft-fp libcall(s).
288   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
289   bool ShouldInvertCC = false;
290   switch (CCCode) {
291   case ISD::SETEQ:
292   case ISD::SETOEQ:
293     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
294           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
295           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
296     break;
297   case ISD::SETNE:
298   case ISD::SETUNE:
299     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
300           (VT == MVT::f64) ? RTLIB::UNE_F64 :
301           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
302     break;
303   case ISD::SETGE:
304   case ISD::SETOGE:
305     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
306           (VT == MVT::f64) ? RTLIB::OGE_F64 :
307           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
308     break;
309   case ISD::SETLT:
310   case ISD::SETOLT:
311     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
312           (VT == MVT::f64) ? RTLIB::OLT_F64 :
313           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
314     break;
315   case ISD::SETLE:
316   case ISD::SETOLE:
317     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
318           (VT == MVT::f64) ? RTLIB::OLE_F64 :
319           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
320     break;
321   case ISD::SETGT:
322   case ISD::SETOGT:
323     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
324           (VT == MVT::f64) ? RTLIB::OGT_F64 :
325           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
326     break;
327   case ISD::SETUO:
328     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
329           (VT == MVT::f64) ? RTLIB::UO_F64 :
330           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
331     break;
332   case ISD::SETO:
333     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
334           (VT == MVT::f64) ? RTLIB::O_F64 :
335           (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
336     break;
337   case ISD::SETONE:
338     // SETONE = SETOLT | SETOGT
339     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340           (VT == MVT::f64) ? RTLIB::OLT_F64 :
341           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342     LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
343           (VT == MVT::f64) ? RTLIB::OGT_F64 :
344           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
345     break;
346   case ISD::SETUEQ:
347     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
348           (VT == MVT::f64) ? RTLIB::UO_F64 :
349           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
350     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
351           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
352           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
353     break;
354   default:
355     // Invert CC for unordered comparisons
356     ShouldInvertCC = true;
357     switch (CCCode) {
358     case ISD::SETULT:
359       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
360             (VT == MVT::f64) ? RTLIB::OGE_F64 :
361             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
362       break;
363     case ISD::SETULE:
364       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
365             (VT == MVT::f64) ? RTLIB::OGT_F64 :
366             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
367       break;
368     case ISD::SETUGT:
369       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370             (VT == MVT::f64) ? RTLIB::OLE_F64 :
371             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372       break;
373     case ISD::SETUGE:
374       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
375             (VT == MVT::f64) ? RTLIB::OLT_F64 :
376             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
377       break;
378     default: llvm_unreachable("Do not know how to soften this setcc!");
379     }
380   }
381 
382   // Use the target specific return value for comparions lib calls.
383   EVT RetVT = getCmpLibcallReturnType();
384   SDValue Ops[2] = {NewLHS, NewRHS};
385   TargetLowering::MakeLibCallOptions CallOptions;
386   EVT OpsVT[2] = { OldLHS.getValueType(),
387                    OldRHS.getValueType() };
388   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
389   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
390   NewRHS = DAG.getConstant(0, dl, RetVT);
391 
392   CCCode = getCmpLibcallCC(LC1);
393   if (ShouldInvertCC) {
394     assert(RetVT.isInteger());
395     CCCode = getSetCCInverse(CCCode, RetVT);
396   }
397 
398   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
399     SDValue Tmp = DAG.getNode(
400         ISD::SETCC, dl,
401         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
402         NewLHS, NewRHS, DAG.getCondCode(CCCode));
403     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
404     NewLHS = DAG.getNode(
405         ISD::SETCC, dl,
406         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
407         NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
408     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
409     NewRHS = SDValue();
410   }
411 }
412 
413 /// Return the entry encoding for a jump table in the current function. The
414 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
415 unsigned TargetLowering::getJumpTableEncoding() const {
416   // In non-pic modes, just use the address of a block.
417   if (!isPositionIndependent())
418     return MachineJumpTableInfo::EK_BlockAddress;
419 
420   // In PIC mode, if the target supports a GPRel32 directive, use it.
421   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
422     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
423 
424   // Otherwise, use a label difference.
425   return MachineJumpTableInfo::EK_LabelDifference32;
426 }
427 
428 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
429                                                  SelectionDAG &DAG) const {
430   // If our PIC model is GP relative, use the global offset table as the base.
431   unsigned JTEncoding = getJumpTableEncoding();
432 
433   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
434       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
435     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
436 
437   return Table;
438 }
439 
440 /// This returns the relocation base for the given PIC jumptable, the same as
441 /// getPICJumpTableRelocBase, but as an MCExpr.
442 const MCExpr *
443 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
444                                              unsigned JTI,MCContext &Ctx) const{
445   // The normal PIC reloc base is the label at the start of the jump table.
446   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
447 }
448 
449 bool
450 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
451   const TargetMachine &TM = getTargetMachine();
452   const GlobalValue *GV = GA->getGlobal();
453 
454   // If the address is not even local to this DSO we will have to load it from
455   // a got and then add the offset.
456   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
457     return false;
458 
459   // If the code is position independent we will have to add a base register.
460   if (isPositionIndependent())
461     return false;
462 
463   // Otherwise we can do it.
464   return true;
465 }
466 
467 //===----------------------------------------------------------------------===//
468 //  Optimization Methods
469 //===----------------------------------------------------------------------===//
470 
471 /// If the specified instruction has a constant integer operand and there are
472 /// bits set in that constant that are not demanded, then clear those bits and
473 /// return true.
474 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
475                                             TargetLoweringOpt &TLO) const {
476   SDLoc DL(Op);
477   unsigned Opcode = Op.getOpcode();
478 
479   // Do target-specific constant optimization.
480   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
481     return TLO.New.getNode();
482 
483   // FIXME: ISD::SELECT, ISD::SELECT_CC
484   switch (Opcode) {
485   default:
486     break;
487   case ISD::XOR:
488   case ISD::AND:
489   case ISD::OR: {
490     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
491     if (!Op1C)
492       return false;
493 
494     // If this is a 'not' op, don't touch it because that's a canonical form.
495     const APInt &C = Op1C->getAPIntValue();
496     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
497       return false;
498 
499     if (!C.isSubsetOf(Demanded)) {
500       EVT VT = Op.getValueType();
501       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
502       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
503       return TLO.CombineTo(Op, NewOp);
504     }
505 
506     break;
507   }
508   }
509 
510   return false;
511 }
512 
513 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
514 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
515 /// generalized for targets with other types of implicit widening casts.
516 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
517                                       const APInt &Demanded,
518                                       TargetLoweringOpt &TLO) const {
519   assert(Op.getNumOperands() == 2 &&
520          "ShrinkDemandedOp only supports binary operators!");
521   assert(Op.getNode()->getNumValues() == 1 &&
522          "ShrinkDemandedOp only supports nodes with one result!");
523 
524   SelectionDAG &DAG = TLO.DAG;
525   SDLoc dl(Op);
526 
527   // Early return, as this function cannot handle vector types.
528   if (Op.getValueType().isVector())
529     return false;
530 
531   // Don't do this if the node has another user, which may require the
532   // full value.
533   if (!Op.getNode()->hasOneUse())
534     return false;
535 
536   // Search for the smallest integer type with free casts to and from
537   // Op's type. For expedience, just check power-of-2 integer types.
538   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
539   unsigned DemandedSize = Demanded.getActiveBits();
540   unsigned SmallVTBits = DemandedSize;
541   if (!isPowerOf2_32(SmallVTBits))
542     SmallVTBits = NextPowerOf2(SmallVTBits);
543   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
544     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
545     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
546         TLI.isZExtFree(SmallVT, Op.getValueType())) {
547       // We found a type with free casts.
548       SDValue X = DAG.getNode(
549           Op.getOpcode(), dl, SmallVT,
550           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
551           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
552       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
553       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
554       return TLO.CombineTo(Op, Z);
555     }
556   }
557   return false;
558 }
559 
560 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
561                                           DAGCombinerInfo &DCI) const {
562   SelectionDAG &DAG = DCI.DAG;
563   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
564                         !DCI.isBeforeLegalizeOps());
565   KnownBits Known;
566 
567   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
568   if (Simplified) {
569     DCI.AddToWorklist(Op.getNode());
570     DCI.CommitTargetLoweringOpt(TLO);
571   }
572   return Simplified;
573 }
574 
575 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
576                                           KnownBits &Known,
577                                           TargetLoweringOpt &TLO,
578                                           unsigned Depth,
579                                           bool AssumeSingleUse) const {
580   EVT VT = Op.getValueType();
581   APInt DemandedElts = VT.isVector()
582                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
583                            : APInt(1, 1);
584   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
585                               AssumeSingleUse);
586 }
587 
588 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
589 // TODO: Under what circumstances can we create nodes? Constant folding?
590 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
591     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
592     SelectionDAG &DAG, unsigned Depth) const {
593   // Limit search depth.
594   if (Depth >= SelectionDAG::MaxRecursionDepth)
595     return SDValue();
596 
597   // Ignore UNDEFs.
598   if (Op.isUndef())
599     return SDValue();
600 
601   // Not demanding any bits/elts from Op.
602   if (DemandedBits == 0 || DemandedElts == 0)
603     return DAG.getUNDEF(Op.getValueType());
604 
605   unsigned NumElts = DemandedElts.getBitWidth();
606   KnownBits LHSKnown, RHSKnown;
607   switch (Op.getOpcode()) {
608   case ISD::BITCAST: {
609     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
610     EVT SrcVT = Src.getValueType();
611     EVT DstVT = Op.getValueType();
612     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
613     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
614 
615     if (NumSrcEltBits == NumDstEltBits)
616       if (SDValue V = SimplifyMultipleUseDemandedBits(
617               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
618         return DAG.getBitcast(DstVT, V);
619 
620     // TODO - bigendian once we have test coverage.
621     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
622         DAG.getDataLayout().isLittleEndian()) {
623       unsigned Scale = NumDstEltBits / NumSrcEltBits;
624       unsigned NumSrcElts = SrcVT.getVectorNumElements();
625       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
626       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
627       for (unsigned i = 0; i != Scale; ++i) {
628         unsigned Offset = i * NumSrcEltBits;
629         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
630         if (!Sub.isNullValue()) {
631           DemandedSrcBits |= Sub;
632           for (unsigned j = 0; j != NumElts; ++j)
633             if (DemandedElts[j])
634               DemandedSrcElts.setBit((j * Scale) + i);
635         }
636       }
637 
638       if (SDValue V = SimplifyMultipleUseDemandedBits(
639               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
640         return DAG.getBitcast(DstVT, V);
641     }
642 
643     // TODO - bigendian once we have test coverage.
644     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
645         DAG.getDataLayout().isLittleEndian()) {
646       unsigned Scale = NumSrcEltBits / NumDstEltBits;
647       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
648       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
649       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
650       for (unsigned i = 0; i != NumElts; ++i)
651         if (DemandedElts[i]) {
652           unsigned Offset = (i % Scale) * NumDstEltBits;
653           DemandedSrcBits.insertBits(DemandedBits, Offset);
654           DemandedSrcElts.setBit(i / Scale);
655         }
656 
657       if (SDValue V = SimplifyMultipleUseDemandedBits(
658               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
659         return DAG.getBitcast(DstVT, V);
660     }
661 
662     break;
663   }
664   case ISD::AND: {
665     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
666     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
667 
668     // If all of the demanded bits are known 1 on one side, return the other.
669     // These bits cannot contribute to the result of the 'and' in this
670     // context.
671     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
672       return Op.getOperand(0);
673     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
674       return Op.getOperand(1);
675     break;
676   }
677   case ISD::OR: {
678     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
679     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
680 
681     // If all of the demanded bits are known zero on one side, return the
682     // other.  These bits cannot contribute to the result of the 'or' in this
683     // context.
684     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
685       return Op.getOperand(0);
686     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
687       return Op.getOperand(1);
688     break;
689   }
690   case ISD::XOR: {
691     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
692     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
693 
694     // If all of the demanded bits are known zero on one side, return the
695     // other.
696     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
697       return Op.getOperand(0);
698     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
699       return Op.getOperand(1);
700     break;
701   }
702   case ISD::SIGN_EXTEND_INREG: {
703     // If none of the extended bits are demanded, eliminate the sextinreg.
704     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
705     if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
706       return Op.getOperand(0);
707     break;
708   }
709   case ISD::INSERT_VECTOR_ELT: {
710     // If we don't demand the inserted element, return the base vector.
711     SDValue Vec = Op.getOperand(0);
712     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
713     EVT VecVT = Vec.getValueType();
714     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
715         !DemandedElts[CIdx->getZExtValue()])
716       return Vec;
717     break;
718   }
719   case ISD::VECTOR_SHUFFLE: {
720     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
721 
722     // If all the demanded elts are from one operand and are inline,
723     // then we can use the operand directly.
724     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
725     for (unsigned i = 0; i != NumElts; ++i) {
726       int M = ShuffleMask[i];
727       if (M < 0 || !DemandedElts[i])
728         continue;
729       AllUndef = false;
730       IdentityLHS &= (M == (int)i);
731       IdentityRHS &= ((M - NumElts) == i);
732     }
733 
734     if (AllUndef)
735       return DAG.getUNDEF(Op.getValueType());
736     if (IdentityLHS)
737       return Op.getOperand(0);
738     if (IdentityRHS)
739       return Op.getOperand(1);
740     break;
741   }
742   default:
743     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
744       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
745               Op, DemandedBits, DemandedElts, DAG, Depth))
746         return V;
747     break;
748   }
749   return SDValue();
750 }
751 
752 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
753 /// result of Op are ever used downstream. If we can use this information to
754 /// simplify Op, create a new simplified DAG node and return true, returning the
755 /// original and new nodes in Old and New. Otherwise, analyze the expression and
756 /// return a mask of Known bits for the expression (used to simplify the
757 /// caller).  The Known bits may only be accurate for those bits in the
758 /// OriginalDemandedBits and OriginalDemandedElts.
759 bool TargetLowering::SimplifyDemandedBits(
760     SDValue Op, const APInt &OriginalDemandedBits,
761     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
762     unsigned Depth, bool AssumeSingleUse) const {
763   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
764   assert(Op.getScalarValueSizeInBits() == BitWidth &&
765          "Mask size mismatches value type size!");
766 
767   unsigned NumElts = OriginalDemandedElts.getBitWidth();
768   assert((!Op.getValueType().isVector() ||
769           NumElts == Op.getValueType().getVectorNumElements()) &&
770          "Unexpected vector size");
771 
772   APInt DemandedBits = OriginalDemandedBits;
773   APInt DemandedElts = OriginalDemandedElts;
774   SDLoc dl(Op);
775   auto &DL = TLO.DAG.getDataLayout();
776 
777   // Don't know anything.
778   Known = KnownBits(BitWidth);
779 
780   // Undef operand.
781   if (Op.isUndef())
782     return false;
783 
784   if (Op.getOpcode() == ISD::Constant) {
785     // We know all of the bits for a constant!
786     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
787     Known.Zero = ~Known.One;
788     return false;
789   }
790 
791   // Other users may use these bits.
792   EVT VT = Op.getValueType();
793   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
794     if (Depth != 0) {
795       // If not at the root, Just compute the Known bits to
796       // simplify things downstream.
797       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
798       return false;
799     }
800     // If this is the root being simplified, allow it to have multiple uses,
801     // just set the DemandedBits/Elts to all bits.
802     DemandedBits = APInt::getAllOnesValue(BitWidth);
803     DemandedElts = APInt::getAllOnesValue(NumElts);
804   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
805     // Not demanding any bits/elts from Op.
806     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
807   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
808     // Limit search depth.
809     return false;
810   }
811 
812   KnownBits Known2, KnownOut;
813   switch (Op.getOpcode()) {
814   case ISD::TargetConstant:
815     llvm_unreachable("Can't simplify this node");
816   case ISD::SCALAR_TO_VECTOR: {
817     if (!DemandedElts[0])
818       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
819 
820     KnownBits SrcKnown;
821     SDValue Src = Op.getOperand(0);
822     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
823     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
824     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
825       return true;
826     Known = SrcKnown.zextOrTrunc(BitWidth, false);
827     break;
828   }
829   case ISD::BUILD_VECTOR:
830     // Collect the known bits that are shared by every demanded element.
831     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
832     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
833     return false; // Don't fall through, will infinitely loop.
834   case ISD::LOAD: {
835     LoadSDNode *LD = cast<LoadSDNode>(Op);
836     if (getTargetConstantFromLoad(LD)) {
837       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
838       return false; // Don't fall through, will infinitely loop.
839     }
840     break;
841   }
842   case ISD::INSERT_VECTOR_ELT: {
843     SDValue Vec = Op.getOperand(0);
844     SDValue Scl = Op.getOperand(1);
845     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
846     EVT VecVT = Vec.getValueType();
847 
848     // If index isn't constant, assume we need all vector elements AND the
849     // inserted element.
850     APInt DemandedVecElts(DemandedElts);
851     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
852       unsigned Idx = CIdx->getZExtValue();
853       DemandedVecElts.clearBit(Idx);
854 
855       // Inserted element is not required.
856       if (!DemandedElts[Idx])
857         return TLO.CombineTo(Op, Vec);
858     }
859 
860     KnownBits KnownScl;
861     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
862     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
863     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
864       return true;
865 
866     Known = KnownScl.zextOrTrunc(BitWidth, false);
867 
868     KnownBits KnownVec;
869     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
870                              Depth + 1))
871       return true;
872 
873     if (!!DemandedVecElts) {
874       Known.One &= KnownVec.One;
875       Known.Zero &= KnownVec.Zero;
876     }
877 
878     return false;
879   }
880   case ISD::INSERT_SUBVECTOR: {
881     SDValue Base = Op.getOperand(0);
882     SDValue Sub = Op.getOperand(1);
883     EVT SubVT = Sub.getValueType();
884     unsigned NumSubElts = SubVT.getVectorNumElements();
885 
886     // If index isn't constant, assume we need the original demanded base
887     // elements and ALL the inserted subvector elements.
888     APInt BaseElts = DemandedElts;
889     APInt SubElts = APInt::getAllOnesValue(NumSubElts);
890     if (isa<ConstantSDNode>(Op.getOperand(2))) {
891       const APInt &Idx = Op.getConstantOperandAPInt(2);
892       if (Idx.ule(NumElts - NumSubElts)) {
893         unsigned SubIdx = Idx.getZExtValue();
894         SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
895         BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
896       }
897     }
898 
899     KnownBits KnownSub, KnownBase;
900     if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
901                              Depth + 1))
902       return true;
903     if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
904                              Depth + 1))
905       return true;
906 
907     Known.Zero.setAllBits();
908     Known.One.setAllBits();
909     if (!!SubElts) {
910         Known.One &= KnownSub.One;
911         Known.Zero &= KnownSub.Zero;
912     }
913     if (!!BaseElts) {
914         Known.One &= KnownBase.One;
915         Known.Zero &= KnownBase.Zero;
916     }
917     break;
918   }
919   case ISD::EXTRACT_SUBVECTOR: {
920     // If index isn't constant, assume we need all the source vector elements.
921     SDValue Src = Op.getOperand(0);
922     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
923     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
924     APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
925     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
926       // Offset the demanded elts by the subvector index.
927       uint64_t Idx = SubIdx->getZExtValue();
928       SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
929     }
930     if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
931       return true;
932     break;
933   }
934   case ISD::CONCAT_VECTORS: {
935     Known.Zero.setAllBits();
936     Known.One.setAllBits();
937     EVT SubVT = Op.getOperand(0).getValueType();
938     unsigned NumSubVecs = Op.getNumOperands();
939     unsigned NumSubElts = SubVT.getVectorNumElements();
940     for (unsigned i = 0; i != NumSubVecs; ++i) {
941       APInt DemandedSubElts =
942           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
943       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
944                                Known2, TLO, Depth + 1))
945         return true;
946       // Known bits are shared by every demanded subvector element.
947       if (!!DemandedSubElts) {
948         Known.One &= Known2.One;
949         Known.Zero &= Known2.Zero;
950       }
951     }
952     break;
953   }
954   case ISD::VECTOR_SHUFFLE: {
955     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
956 
957     // Collect demanded elements from shuffle operands..
958     APInt DemandedLHS(NumElts, 0);
959     APInt DemandedRHS(NumElts, 0);
960     for (unsigned i = 0; i != NumElts; ++i) {
961       if (!DemandedElts[i])
962         continue;
963       int M = ShuffleMask[i];
964       if (M < 0) {
965         // For UNDEF elements, we don't know anything about the common state of
966         // the shuffle result.
967         DemandedLHS.clearAllBits();
968         DemandedRHS.clearAllBits();
969         break;
970       }
971       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
972       if (M < (int)NumElts)
973         DemandedLHS.setBit(M);
974       else
975         DemandedRHS.setBit(M - NumElts);
976     }
977 
978     if (!!DemandedLHS || !!DemandedRHS) {
979       SDValue Op0 = Op.getOperand(0);
980       SDValue Op1 = Op.getOperand(1);
981 
982       Known.Zero.setAllBits();
983       Known.One.setAllBits();
984       if (!!DemandedLHS) {
985         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
986                                  Depth + 1))
987           return true;
988         Known.One &= Known2.One;
989         Known.Zero &= Known2.Zero;
990       }
991       if (!!DemandedRHS) {
992         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
993                                  Depth + 1))
994           return true;
995         Known.One &= Known2.One;
996         Known.Zero &= Known2.Zero;
997       }
998 
999       // Attempt to avoid multi-use ops if we don't need anything from them.
1000       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1001           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1002       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1003           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1004       if (DemandedOp0 || DemandedOp1) {
1005         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1006         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1007         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1008         return TLO.CombineTo(Op, NewOp);
1009       }
1010     }
1011     break;
1012   }
1013   case ISD::AND: {
1014     SDValue Op0 = Op.getOperand(0);
1015     SDValue Op1 = Op.getOperand(1);
1016 
1017     // If the RHS is a constant, check to see if the LHS would be zero without
1018     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1019     // simplify the LHS, here we're using information from the LHS to simplify
1020     // the RHS.
1021     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1022       // Do not increment Depth here; that can cause an infinite loop.
1023       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1024       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1025       if ((LHSKnown.Zero & DemandedBits) ==
1026           (~RHSC->getAPIntValue() & DemandedBits))
1027         return TLO.CombineTo(Op, Op0);
1028 
1029       // If any of the set bits in the RHS are known zero on the LHS, shrink
1030       // the constant.
1031       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1032         return true;
1033 
1034       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1035       // constant, but if this 'and' is only clearing bits that were just set by
1036       // the xor, then this 'and' can be eliminated by shrinking the mask of
1037       // the xor. For example, for a 32-bit X:
1038       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1039       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1040           LHSKnown.One == ~RHSC->getAPIntValue()) {
1041         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1042         return TLO.CombineTo(Op, Xor);
1043       }
1044     }
1045 
1046     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1047                              Depth + 1))
1048       return true;
1049     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1050     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1051                              Known2, TLO, Depth + 1))
1052       return true;
1053     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1054 
1055     // Attempt to avoid multi-use ops if we don't need anything from them.
1056     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1057       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1058           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1059       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1060           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1061       if (DemandedOp0 || DemandedOp1) {
1062         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1063         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1064         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1065         return TLO.CombineTo(Op, NewOp);
1066       }
1067     }
1068 
1069     // If all of the demanded bits are known one on one side, return the other.
1070     // These bits cannot contribute to the result of the 'and'.
1071     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1072       return TLO.CombineTo(Op, Op0);
1073     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1074       return TLO.CombineTo(Op, Op1);
1075     // If all of the demanded bits in the inputs are known zeros, return zero.
1076     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1077       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1078     // If the RHS is a constant, see if we can simplify it.
1079     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1080       return true;
1081     // If the operation can be done in a smaller type, do so.
1082     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1083       return true;
1084 
1085     // Output known-1 bits are only known if set in both the LHS & RHS.
1086     Known.One &= Known2.One;
1087     // Output known-0 are known to be clear if zero in either the LHS | RHS.
1088     Known.Zero |= Known2.Zero;
1089     break;
1090   }
1091   case ISD::OR: {
1092     SDValue Op0 = Op.getOperand(0);
1093     SDValue Op1 = Op.getOperand(1);
1094 
1095     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1096                              Depth + 1))
1097       return true;
1098     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1099     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1100                              Known2, TLO, Depth + 1))
1101       return true;
1102     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1103 
1104     // Attempt to avoid multi-use ops if we don't need anything from them.
1105     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1106       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1107           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1108       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1109           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1110       if (DemandedOp0 || DemandedOp1) {
1111         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1112         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1113         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1114         return TLO.CombineTo(Op, NewOp);
1115       }
1116     }
1117 
1118     // If all of the demanded bits are known zero on one side, return the other.
1119     // These bits cannot contribute to the result of the 'or'.
1120     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1121       return TLO.CombineTo(Op, Op0);
1122     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1123       return TLO.CombineTo(Op, Op1);
1124     // If the RHS is a constant, see if we can simplify it.
1125     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1126       return true;
1127     // If the operation can be done in a smaller type, do so.
1128     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1129       return true;
1130 
1131     // Output known-0 bits are only known if clear in both the LHS & RHS.
1132     Known.Zero &= Known2.Zero;
1133     // Output known-1 are known to be set if set in either the LHS | RHS.
1134     Known.One |= Known2.One;
1135     break;
1136   }
1137   case ISD::XOR: {
1138     SDValue Op0 = Op.getOperand(0);
1139     SDValue Op1 = Op.getOperand(1);
1140 
1141     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1142                              Depth + 1))
1143       return true;
1144     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1145     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1146                              Depth + 1))
1147       return true;
1148     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1149 
1150     // Attempt to avoid multi-use ops if we don't need anything from them.
1151     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1152       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1153           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1154       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1155           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1156       if (DemandedOp0 || DemandedOp1) {
1157         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1158         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1159         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1160         return TLO.CombineTo(Op, NewOp);
1161       }
1162     }
1163 
1164     // If all of the demanded bits are known zero on one side, return the other.
1165     // These bits cannot contribute to the result of the 'xor'.
1166     if (DemandedBits.isSubsetOf(Known.Zero))
1167       return TLO.CombineTo(Op, Op0);
1168     if (DemandedBits.isSubsetOf(Known2.Zero))
1169       return TLO.CombineTo(Op, Op1);
1170     // If the operation can be done in a smaller type, do so.
1171     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1172       return true;
1173 
1174     // If all of the unknown bits are known to be zero on one side or the other
1175     // (but not both) turn this into an *inclusive* or.
1176     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1177     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1178       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1179 
1180     // Output known-0 bits are known if clear or set in both the LHS & RHS.
1181     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1182     // Output known-1 are known to be set if set in only one of the LHS, RHS.
1183     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1184 
1185     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1186       // If one side is a constant, and all of the known set bits on the other
1187       // side are also set in the constant, turn this into an AND, as we know
1188       // the bits will be cleared.
1189       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1190       // NB: it is okay if more bits are known than are requested
1191       if (C->getAPIntValue() == Known2.One) {
1192         SDValue ANDC =
1193             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1194         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1195       }
1196 
1197       // If the RHS is a constant, see if we can change it. Don't alter a -1
1198       // constant because that's a 'not' op, and that is better for combining
1199       // and codegen.
1200       if (!C->isAllOnesValue()) {
1201         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1202           // We're flipping all demanded bits. Flip the undemanded bits too.
1203           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1204           return TLO.CombineTo(Op, New);
1205         }
1206         // If we can't turn this into a 'not', try to shrink the constant.
1207         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1208           return true;
1209       }
1210     }
1211 
1212     Known = std::move(KnownOut);
1213     break;
1214   }
1215   case ISD::SELECT:
1216     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1217                              Depth + 1))
1218       return true;
1219     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1220                              Depth + 1))
1221       return true;
1222     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1223     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1224 
1225     // If the operands are constants, see if we can simplify them.
1226     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1227       return true;
1228 
1229     // Only known if known in both the LHS and RHS.
1230     Known.One &= Known2.One;
1231     Known.Zero &= Known2.Zero;
1232     break;
1233   case ISD::SELECT_CC:
1234     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1235                              Depth + 1))
1236       return true;
1237     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1238                              Depth + 1))
1239       return true;
1240     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1241     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1242 
1243     // If the operands are constants, see if we can simplify them.
1244     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1245       return true;
1246 
1247     // Only known if known in both the LHS and RHS.
1248     Known.One &= Known2.One;
1249     Known.Zero &= Known2.Zero;
1250     break;
1251   case ISD::SETCC: {
1252     SDValue Op0 = Op.getOperand(0);
1253     SDValue Op1 = Op.getOperand(1);
1254     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1255     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1256     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1257     // -1, we may be able to bypass the setcc.
1258     if (DemandedBits.isSignMask() &&
1259         Op0.getScalarValueSizeInBits() == BitWidth &&
1260         getBooleanContents(VT) ==
1261             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1262       // If we're testing X < 0, then this compare isn't needed - just use X!
1263       // FIXME: We're limiting to integer types here, but this should also work
1264       // if we don't care about FP signed-zero. The use of SETLT with FP means
1265       // that we don't care about NaNs.
1266       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1267           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1268         return TLO.CombineTo(Op, Op0);
1269 
1270       // TODO: Should we check for other forms of sign-bit comparisons?
1271       // Examples: X <= -1, X >= 0
1272     }
1273     if (getBooleanContents(Op0.getValueType()) ==
1274             TargetLowering::ZeroOrOneBooleanContent &&
1275         BitWidth > 1)
1276       Known.Zero.setBitsFrom(1);
1277     break;
1278   }
1279   case ISD::SHL: {
1280     SDValue Op0 = Op.getOperand(0);
1281     SDValue Op1 = Op.getOperand(1);
1282 
1283     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1284       // If the shift count is an invalid immediate, don't do anything.
1285       if (SA->getAPIntValue().uge(BitWidth))
1286         break;
1287 
1288       unsigned ShAmt = SA->getZExtValue();
1289       if (ShAmt == 0)
1290         return TLO.CombineTo(Op, Op0);
1291 
1292       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1293       // single shift.  We can do this if the bottom bits (which are shifted
1294       // out) are never demanded.
1295       // TODO - support non-uniform vector amounts.
1296       if (Op0.getOpcode() == ISD::SRL) {
1297         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1298           if (ConstantSDNode *SA2 =
1299                   isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1300             if (SA2->getAPIntValue().ult(BitWidth)) {
1301               unsigned C1 = SA2->getZExtValue();
1302               unsigned Opc = ISD::SHL;
1303               int Diff = ShAmt - C1;
1304               if (Diff < 0) {
1305                 Diff = -Diff;
1306                 Opc = ISD::SRL;
1307               }
1308 
1309               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1310               return TLO.CombineTo(
1311                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1312             }
1313           }
1314         }
1315       }
1316 
1317       if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1318                                Known, TLO, Depth + 1))
1319         return true;
1320 
1321       // Try shrinking the operation as long as the shift amount will still be
1322       // in range.
1323       if ((ShAmt < DemandedBits.getActiveBits()) &&
1324           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1325         return true;
1326 
1327       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1328       // are not demanded. This will likely allow the anyext to be folded away.
1329       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1330         SDValue InnerOp = Op0.getOperand(0);
1331         EVT InnerVT = InnerOp.getValueType();
1332         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1333         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1334             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1335           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1336           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1337             ShTy = InnerVT;
1338           SDValue NarrowShl =
1339               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1340                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1341           return TLO.CombineTo(
1342               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1343         }
1344         // Repeat the SHL optimization above in cases where an extension
1345         // intervenes: (shl (anyext (shr x, c1)), c2) to
1346         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1347         // aren't demanded (as above) and that the shifted upper c1 bits of
1348         // x aren't demanded.
1349         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1350             InnerOp.hasOneUse()) {
1351           if (ConstantSDNode *SA2 =
1352                   isConstOrConstSplat(InnerOp.getOperand(1))) {
1353             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1354             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1355                 DemandedBits.getActiveBits() <=
1356                     (InnerBits - InnerShAmt + ShAmt) &&
1357                 DemandedBits.countTrailingZeros() >= ShAmt) {
1358               SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1359                                                   Op1.getValueType());
1360               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1361                                                InnerOp.getOperand(0));
1362               return TLO.CombineTo(
1363                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1364             }
1365           }
1366         }
1367       }
1368 
1369       Known.Zero <<= ShAmt;
1370       Known.One <<= ShAmt;
1371       // low bits known zero.
1372       Known.Zero.setLowBits(ShAmt);
1373     }
1374     break;
1375   }
1376   case ISD::SRL: {
1377     SDValue Op0 = Op.getOperand(0);
1378     SDValue Op1 = Op.getOperand(1);
1379 
1380     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1381       // If the shift count is an invalid immediate, don't do anything.
1382       if (SA->getAPIntValue().uge(BitWidth))
1383         break;
1384 
1385       unsigned ShAmt = SA->getZExtValue();
1386       if (ShAmt == 0)
1387         return TLO.CombineTo(Op, Op0);
1388 
1389       EVT ShiftVT = Op1.getValueType();
1390       APInt InDemandedMask = (DemandedBits << ShAmt);
1391 
1392       // If the shift is exact, then it does demand the low bits (and knows that
1393       // they are zero).
1394       if (Op->getFlags().hasExact())
1395         InDemandedMask.setLowBits(ShAmt);
1396 
1397       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1398       // single shift.  We can do this if the top bits (which are shifted out)
1399       // are never demanded.
1400       // TODO - support non-uniform vector amounts.
1401       if (Op0.getOpcode() == ISD::SHL) {
1402         if (ConstantSDNode *SA2 =
1403                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1404           if (!DemandedBits.intersects(
1405                   APInt::getHighBitsSet(BitWidth, ShAmt))) {
1406             if (SA2->getAPIntValue().ult(BitWidth)) {
1407               unsigned C1 = SA2->getZExtValue();
1408               unsigned Opc = ISD::SRL;
1409               int Diff = ShAmt - C1;
1410               if (Diff < 0) {
1411                 Diff = -Diff;
1412                 Opc = ISD::SHL;
1413               }
1414 
1415               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1416               return TLO.CombineTo(
1417                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1418             }
1419           }
1420         }
1421       }
1422 
1423       // Compute the new bits that are at the top now.
1424       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1425                                Depth + 1))
1426         return true;
1427       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1428       Known.Zero.lshrInPlace(ShAmt);
1429       Known.One.lshrInPlace(ShAmt);
1430 
1431       Known.Zero.setHighBits(ShAmt); // High bits known zero.
1432     }
1433     break;
1434   }
1435   case ISD::SRA: {
1436     SDValue Op0 = Op.getOperand(0);
1437     SDValue Op1 = Op.getOperand(1);
1438 
1439     // If this is an arithmetic shift right and only the low-bit is set, we can
1440     // always convert this into a logical shr, even if the shift amount is
1441     // variable.  The low bit of the shift cannot be an input sign bit unless
1442     // the shift amount is >= the size of the datatype, which is undefined.
1443     if (DemandedBits.isOneValue())
1444       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1445 
1446     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1447       // If the shift count is an invalid immediate, don't do anything.
1448       if (SA->getAPIntValue().uge(BitWidth))
1449         break;
1450 
1451       unsigned ShAmt = SA->getZExtValue();
1452       if (ShAmt == 0)
1453         return TLO.CombineTo(Op, Op0);
1454 
1455       APInt InDemandedMask = (DemandedBits << ShAmt);
1456 
1457       // If the shift is exact, then it does demand the low bits (and knows that
1458       // they are zero).
1459       if (Op->getFlags().hasExact())
1460         InDemandedMask.setLowBits(ShAmt);
1461 
1462       // If any of the demanded bits are produced by the sign extension, we also
1463       // demand the input sign bit.
1464       if (DemandedBits.countLeadingZeros() < ShAmt)
1465         InDemandedMask.setSignBit();
1466 
1467       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1468                                Depth + 1))
1469         return true;
1470       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1471       Known.Zero.lshrInPlace(ShAmt);
1472       Known.One.lshrInPlace(ShAmt);
1473 
1474       // If the input sign bit is known to be zero, or if none of the top bits
1475       // are demanded, turn this into an unsigned shift right.
1476       if (Known.Zero[BitWidth - ShAmt - 1] ||
1477           DemandedBits.countLeadingZeros() >= ShAmt) {
1478         SDNodeFlags Flags;
1479         Flags.setExact(Op->getFlags().hasExact());
1480         return TLO.CombineTo(
1481             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1482       }
1483 
1484       int Log2 = DemandedBits.exactLogBase2();
1485       if (Log2 >= 0) {
1486         // The bit must come from the sign.
1487         SDValue NewSA =
1488             TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1489         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1490       }
1491 
1492       if (Known.One[BitWidth - ShAmt - 1])
1493         // New bits are known one.
1494         Known.One.setHighBits(ShAmt);
1495     }
1496     break;
1497   }
1498   case ISD::FSHL:
1499   case ISD::FSHR: {
1500     SDValue Op0 = Op.getOperand(0);
1501     SDValue Op1 = Op.getOperand(1);
1502     SDValue Op2 = Op.getOperand(2);
1503     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1504 
1505     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1506       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1507 
1508       // For fshl, 0-shift returns the 1st arg.
1509       // For fshr, 0-shift returns the 2nd arg.
1510       if (Amt == 0) {
1511         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1512                                  Known, TLO, Depth + 1))
1513           return true;
1514         break;
1515       }
1516 
1517       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1518       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1519       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1520       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1521       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1522                                Depth + 1))
1523         return true;
1524       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1525                                Depth + 1))
1526         return true;
1527 
1528       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1529       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1530       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1531       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1532       Known.One |= Known2.One;
1533       Known.Zero |= Known2.Zero;
1534     }
1535     break;
1536   }
1537   case ISD::BITREVERSE: {
1538     SDValue Src = Op.getOperand(0);
1539     APInt DemandedSrcBits = DemandedBits.reverseBits();
1540     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1541                              Depth + 1))
1542       return true;
1543     Known.One = Known2.One.reverseBits();
1544     Known.Zero = Known2.Zero.reverseBits();
1545     break;
1546   }
1547   case ISD::BSWAP: {
1548     SDValue Src = Op.getOperand(0);
1549     APInt DemandedSrcBits = DemandedBits.byteSwap();
1550     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1551                              Depth + 1))
1552       return true;
1553     Known.One = Known2.One.byteSwap();
1554     Known.Zero = Known2.Zero.byteSwap();
1555     break;
1556   }
1557   case ISD::SIGN_EXTEND_INREG: {
1558     SDValue Op0 = Op.getOperand(0);
1559     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1560     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1561 
1562     // If we only care about the highest bit, don't bother shifting right.
1563     if (DemandedBits.isSignMask()) {
1564       unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1565       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1566       // However if the input is already sign extended we expect the sign
1567       // extension to be dropped altogether later and do not simplify.
1568       if (!AlreadySignExtended) {
1569         // Compute the correct shift amount type, which must be getShiftAmountTy
1570         // for scalar types after legalization.
1571         EVT ShiftAmtTy = VT;
1572         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1573           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1574 
1575         SDValue ShiftAmt =
1576             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1577         return TLO.CombineTo(Op,
1578                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1579       }
1580     }
1581 
1582     // If none of the extended bits are demanded, eliminate the sextinreg.
1583     if (DemandedBits.getActiveBits() <= ExVTBits)
1584       return TLO.CombineTo(Op, Op0);
1585 
1586     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1587 
1588     // Since the sign extended bits are demanded, we know that the sign
1589     // bit is demanded.
1590     InputDemandedBits.setBit(ExVTBits - 1);
1591 
1592     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1593       return true;
1594     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1595 
1596     // If the sign bit of the input is known set or clear, then we know the
1597     // top bits of the result.
1598 
1599     // If the input sign bit is known zero, convert this into a zero extension.
1600     if (Known.Zero[ExVTBits - 1])
1601       return TLO.CombineTo(
1602           Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1603 
1604     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1605     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1606       Known.One.setBitsFrom(ExVTBits);
1607       Known.Zero &= Mask;
1608     } else { // Input sign bit unknown
1609       Known.Zero &= Mask;
1610       Known.One &= Mask;
1611     }
1612     break;
1613   }
1614   case ISD::BUILD_PAIR: {
1615     EVT HalfVT = Op.getOperand(0).getValueType();
1616     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1617 
1618     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1619     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1620 
1621     KnownBits KnownLo, KnownHi;
1622 
1623     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1624       return true;
1625 
1626     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1627       return true;
1628 
1629     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1630                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1631 
1632     Known.One = KnownLo.One.zext(BitWidth) |
1633                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1634     break;
1635   }
1636   case ISD::ZERO_EXTEND:
1637   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1638     SDValue Src = Op.getOperand(0);
1639     EVT SrcVT = Src.getValueType();
1640     unsigned InBits = SrcVT.getScalarSizeInBits();
1641     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1642     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1643 
1644     // If none of the top bits are demanded, convert this into an any_extend.
1645     if (DemandedBits.getActiveBits() <= InBits) {
1646       // If we only need the non-extended bits of the bottom element
1647       // then we can just bitcast to the result.
1648       if (IsVecInReg && DemandedElts == 1 &&
1649           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1650           TLO.DAG.getDataLayout().isLittleEndian())
1651         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1652 
1653       unsigned Opc =
1654           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1655       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1656         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1657     }
1658 
1659     APInt InDemandedBits = DemandedBits.trunc(InBits);
1660     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1661     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1662                              Depth + 1))
1663       return true;
1664     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1665     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1666     Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1667     break;
1668   }
1669   case ISD::SIGN_EXTEND:
1670   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1671     SDValue Src = Op.getOperand(0);
1672     EVT SrcVT = Src.getValueType();
1673     unsigned InBits = SrcVT.getScalarSizeInBits();
1674     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1675     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1676 
1677     // If none of the top bits are demanded, convert this into an any_extend.
1678     if (DemandedBits.getActiveBits() <= InBits) {
1679       // If we only need the non-extended bits of the bottom element
1680       // then we can just bitcast to the result.
1681       if (IsVecInReg && DemandedElts == 1 &&
1682           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1683           TLO.DAG.getDataLayout().isLittleEndian())
1684         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1685 
1686       unsigned Opc =
1687           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1688       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1689         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1690     }
1691 
1692     APInt InDemandedBits = DemandedBits.trunc(InBits);
1693     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1694 
1695     // Since some of the sign extended bits are demanded, we know that the sign
1696     // bit is demanded.
1697     InDemandedBits.setBit(InBits - 1);
1698 
1699     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1700                              Depth + 1))
1701       return true;
1702     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1703     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1704 
1705     // If the sign bit is known one, the top bits match.
1706     Known = Known.sext(BitWidth);
1707 
1708     // If the sign bit is known zero, convert this to a zero extend.
1709     if (Known.isNonNegative()) {
1710       unsigned Opc =
1711           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1712       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1713         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1714     }
1715     break;
1716   }
1717   case ISD::ANY_EXTEND:
1718   case ISD::ANY_EXTEND_VECTOR_INREG: {
1719     SDValue Src = Op.getOperand(0);
1720     EVT SrcVT = Src.getValueType();
1721     unsigned InBits = SrcVT.getScalarSizeInBits();
1722     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1723     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1724 
1725     // If we only need the bottom element then we can just bitcast.
1726     // TODO: Handle ANY_EXTEND?
1727     if (IsVecInReg && DemandedElts == 1 &&
1728         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1729         TLO.DAG.getDataLayout().isLittleEndian())
1730       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1731 
1732     APInt InDemandedBits = DemandedBits.trunc(InBits);
1733     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1734     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1735                              Depth + 1))
1736       return true;
1737     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1738     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1739     Known = Known.zext(BitWidth, false /* => any extend */);
1740     break;
1741   }
1742   case ISD::TRUNCATE: {
1743     SDValue Src = Op.getOperand(0);
1744 
1745     // Simplify the input, using demanded bit information, and compute the known
1746     // zero/one bits live out.
1747     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1748     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1749     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1750       return true;
1751     Known = Known.trunc(BitWidth);
1752 
1753     // Attempt to avoid multi-use ops if we don't need anything from them.
1754     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1755             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1756       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1757 
1758     // If the input is only used by this truncate, see if we can shrink it based
1759     // on the known demanded bits.
1760     if (Src.getNode()->hasOneUse()) {
1761       switch (Src.getOpcode()) {
1762       default:
1763         break;
1764       case ISD::SRL:
1765         // Shrink SRL by a constant if none of the high bits shifted in are
1766         // demanded.
1767         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1768           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1769           // undesirable.
1770           break;
1771 
1772         auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1773         if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
1774           break;
1775 
1776         SDValue Shift = Src.getOperand(1);
1777         uint64_t ShVal = ShAmt->getZExtValue();
1778 
1779         if (TLO.LegalTypes())
1780           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1781 
1782         APInt HighBits =
1783             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1784         HighBits.lshrInPlace(ShVal);
1785         HighBits = HighBits.trunc(BitWidth);
1786 
1787         if (!(HighBits & DemandedBits)) {
1788           // None of the shifted in bits are needed.  Add a truncate of the
1789           // shift input, then shift it.
1790           SDValue NewTrunc =
1791               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1792           return TLO.CombineTo(
1793               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
1794         }
1795         break;
1796       }
1797     }
1798 
1799     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1800     break;
1801   }
1802   case ISD::AssertZext: {
1803     // AssertZext demands all of the high bits, plus any of the low bits
1804     // demanded by its users.
1805     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1806     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1807     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1808                              TLO, Depth + 1))
1809       return true;
1810     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1811 
1812     Known.Zero |= ~InMask;
1813     break;
1814   }
1815   case ISD::EXTRACT_VECTOR_ELT: {
1816     SDValue Src = Op.getOperand(0);
1817     SDValue Idx = Op.getOperand(1);
1818     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1819     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1820 
1821     // Demand the bits from every vector element without a constant index.
1822     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1823     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1824       if (CIdx->getAPIntValue().ult(NumSrcElts))
1825         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1826 
1827     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1828     // anything about the extended bits.
1829     APInt DemandedSrcBits = DemandedBits;
1830     if (BitWidth > EltBitWidth)
1831       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1832 
1833     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1834                              Depth + 1))
1835       return true;
1836 
1837     Known = Known2;
1838     if (BitWidth > EltBitWidth)
1839       Known = Known.zext(BitWidth, false /* => any extend */);
1840     break;
1841   }
1842   case ISD::BITCAST: {
1843     SDValue Src = Op.getOperand(0);
1844     EVT SrcVT = Src.getValueType();
1845     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1846 
1847     // If this is an FP->Int bitcast and if the sign bit is the only
1848     // thing demanded, turn this into a FGETSIGN.
1849     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1850         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1851         SrcVT.isFloatingPoint()) {
1852       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1853       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1854       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1855           SrcVT != MVT::f128) {
1856         // Cannot eliminate/lower SHL for f128 yet.
1857         EVT Ty = OpVTLegal ? VT : MVT::i32;
1858         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1859         // place.  We expect the SHL to be eliminated by other optimizations.
1860         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1861         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1862         if (!OpVTLegal && OpVTSizeInBits > 32)
1863           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1864         unsigned ShVal = Op.getValueSizeInBits() - 1;
1865         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1866         return TLO.CombineTo(Op,
1867                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1868       }
1869     }
1870 
1871     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1872     // Demand the elt/bit if any of the original elts/bits are demanded.
1873     // TODO - bigendian once we have test coverage.
1874     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
1875         TLO.DAG.getDataLayout().isLittleEndian()) {
1876       unsigned Scale = BitWidth / NumSrcEltBits;
1877       unsigned NumSrcElts = SrcVT.getVectorNumElements();
1878       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1879       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1880       for (unsigned i = 0; i != Scale; ++i) {
1881         unsigned Offset = i * NumSrcEltBits;
1882         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1883         if (!Sub.isNullValue()) {
1884           DemandedSrcBits |= Sub;
1885           for (unsigned j = 0; j != NumElts; ++j)
1886             if (DemandedElts[j])
1887               DemandedSrcElts.setBit((j * Scale) + i);
1888         }
1889       }
1890 
1891       APInt KnownSrcUndef, KnownSrcZero;
1892       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1893                                      KnownSrcZero, TLO, Depth + 1))
1894         return true;
1895 
1896       KnownBits KnownSrcBits;
1897       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1898                                KnownSrcBits, TLO, Depth + 1))
1899         return true;
1900     } else if ((NumSrcEltBits % BitWidth) == 0 &&
1901                TLO.DAG.getDataLayout().isLittleEndian()) {
1902       unsigned Scale = NumSrcEltBits / BitWidth;
1903       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1904       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1905       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1906       for (unsigned i = 0; i != NumElts; ++i)
1907         if (DemandedElts[i]) {
1908           unsigned Offset = (i % Scale) * BitWidth;
1909           DemandedSrcBits.insertBits(DemandedBits, Offset);
1910           DemandedSrcElts.setBit(i / Scale);
1911         }
1912 
1913       if (SrcVT.isVector()) {
1914         APInt KnownSrcUndef, KnownSrcZero;
1915         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1916                                        KnownSrcZero, TLO, Depth + 1))
1917           return true;
1918       }
1919 
1920       KnownBits KnownSrcBits;
1921       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1922                                KnownSrcBits, TLO, Depth + 1))
1923         return true;
1924     }
1925 
1926     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1927     // recursive call where Known may be useful to the caller.
1928     if (Depth > 0) {
1929       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1930       return false;
1931     }
1932     break;
1933   }
1934   case ISD::ADD:
1935   case ISD::MUL:
1936   case ISD::SUB: {
1937     // Add, Sub, and Mul don't demand any bits in positions beyond that
1938     // of the highest bit demanded of them.
1939     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1940     SDNodeFlags Flags = Op.getNode()->getFlags();
1941     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1942     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1943     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1944                              Depth + 1) ||
1945         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1946                              Depth + 1) ||
1947         // See if the operation should be performed at a smaller bit width.
1948         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1949       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1950         // Disable the nsw and nuw flags. We can no longer guarantee that we
1951         // won't wrap after simplification.
1952         Flags.setNoSignedWrap(false);
1953         Flags.setNoUnsignedWrap(false);
1954         SDValue NewOp =
1955             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1956         return TLO.CombineTo(Op, NewOp);
1957       }
1958       return true;
1959     }
1960 
1961     // Attempt to avoid multi-use ops if we don't need anything from them.
1962     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1963       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1964           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1965       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1966           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1967       if (DemandedOp0 || DemandedOp1) {
1968         Flags.setNoSignedWrap(false);
1969         Flags.setNoUnsignedWrap(false);
1970         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1971         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1972         SDValue NewOp =
1973             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1974         return TLO.CombineTo(Op, NewOp);
1975       }
1976     }
1977 
1978     // If we have a constant operand, we may be able to turn it into -1 if we
1979     // do not demand the high bits. This can make the constant smaller to
1980     // encode, allow more general folding, or match specialized instruction
1981     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1982     // is probably not useful (and could be detrimental).
1983     ConstantSDNode *C = isConstOrConstSplat(Op1);
1984     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1985     if (C && !C->isAllOnesValue() && !C->isOne() &&
1986         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1987       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1988       // Disable the nsw and nuw flags. We can no longer guarantee that we
1989       // won't wrap after simplification.
1990       Flags.setNoSignedWrap(false);
1991       Flags.setNoUnsignedWrap(false);
1992       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1993       return TLO.CombineTo(Op, NewOp);
1994     }
1995 
1996     LLVM_FALLTHROUGH;
1997   }
1998   default:
1999     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2000       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2001                                             Known, TLO, Depth))
2002         return true;
2003       break;
2004     }
2005 
2006     // Just use computeKnownBits to compute output bits.
2007     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2008     break;
2009   }
2010 
2011   // If we know the value of all of the demanded bits, return this as a
2012   // constant.
2013   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2014     // Avoid folding to a constant if any OpaqueConstant is involved.
2015     const SDNode *N = Op.getNode();
2016     for (SDNodeIterator I = SDNodeIterator::begin(N),
2017                         E = SDNodeIterator::end(N);
2018          I != E; ++I) {
2019       SDNode *Op = *I;
2020       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2021         if (C->isOpaque())
2022           return false;
2023     }
2024     // TODO: Handle float bits as well.
2025     if (VT.isInteger())
2026       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2027   }
2028 
2029   return false;
2030 }
2031 
2032 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2033                                                 const APInt &DemandedElts,
2034                                                 APInt &KnownUndef,
2035                                                 APInt &KnownZero,
2036                                                 DAGCombinerInfo &DCI) const {
2037   SelectionDAG &DAG = DCI.DAG;
2038   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2039                         !DCI.isBeforeLegalizeOps());
2040 
2041   bool Simplified =
2042       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2043   if (Simplified) {
2044     DCI.AddToWorklist(Op.getNode());
2045     DCI.CommitTargetLoweringOpt(TLO);
2046   }
2047 
2048   return Simplified;
2049 }
2050 
2051 /// Given a vector binary operation and known undefined elements for each input
2052 /// operand, compute whether each element of the output is undefined.
2053 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2054                                          const APInt &UndefOp0,
2055                                          const APInt &UndefOp1) {
2056   EVT VT = BO.getValueType();
2057   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2058          "Vector binop only");
2059 
2060   EVT EltVT = VT.getVectorElementType();
2061   unsigned NumElts = VT.getVectorNumElements();
2062   assert(UndefOp0.getBitWidth() == NumElts &&
2063          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2064 
2065   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2066                                    const APInt &UndefVals) {
2067     if (UndefVals[Index])
2068       return DAG.getUNDEF(EltVT);
2069 
2070     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2071       // Try hard to make sure that the getNode() call is not creating temporary
2072       // nodes. Ignore opaque integers because they do not constant fold.
2073       SDValue Elt = BV->getOperand(Index);
2074       auto *C = dyn_cast<ConstantSDNode>(Elt);
2075       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2076         return Elt;
2077     }
2078 
2079     return SDValue();
2080   };
2081 
2082   APInt KnownUndef = APInt::getNullValue(NumElts);
2083   for (unsigned i = 0; i != NumElts; ++i) {
2084     // If both inputs for this element are either constant or undef and match
2085     // the element type, compute the constant/undef result for this element of
2086     // the vector.
2087     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2088     // not handle FP constants. The code within getNode() should be refactored
2089     // to avoid the danger of creating a bogus temporary node here.
2090     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2091     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2092     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2093       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2094         KnownUndef.setBit(i);
2095   }
2096   return KnownUndef;
2097 }
2098 
2099 bool TargetLowering::SimplifyDemandedVectorElts(
2100     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2101     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2102     bool AssumeSingleUse) const {
2103   EVT VT = Op.getValueType();
2104   APInt DemandedElts = OriginalDemandedElts;
2105   unsigned NumElts = DemandedElts.getBitWidth();
2106   assert(VT.isVector() && "Expected vector op");
2107   assert(VT.getVectorNumElements() == NumElts &&
2108          "Mask size mismatches value type element count!");
2109 
2110   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2111 
2112   // Undef operand.
2113   if (Op.isUndef()) {
2114     KnownUndef.setAllBits();
2115     return false;
2116   }
2117 
2118   // If Op has other users, assume that all elements are needed.
2119   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2120     DemandedElts.setAllBits();
2121 
2122   // Not demanding any elements from Op.
2123   if (DemandedElts == 0) {
2124     KnownUndef.setAllBits();
2125     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2126   }
2127 
2128   // Limit search depth.
2129   if (Depth >= SelectionDAG::MaxRecursionDepth)
2130     return false;
2131 
2132   SDLoc DL(Op);
2133   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2134 
2135   switch (Op.getOpcode()) {
2136   case ISD::SCALAR_TO_VECTOR: {
2137     if (!DemandedElts[0]) {
2138       KnownUndef.setAllBits();
2139       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2140     }
2141     KnownUndef.setHighBits(NumElts - 1);
2142     break;
2143   }
2144   case ISD::BITCAST: {
2145     SDValue Src = Op.getOperand(0);
2146     EVT SrcVT = Src.getValueType();
2147 
2148     // We only handle vectors here.
2149     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2150     if (!SrcVT.isVector())
2151       break;
2152 
2153     // Fast handling of 'identity' bitcasts.
2154     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2155     if (NumSrcElts == NumElts)
2156       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2157                                         KnownZero, TLO, Depth + 1);
2158 
2159     APInt SrcZero, SrcUndef;
2160     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2161 
2162     // Bitcast from 'large element' src vector to 'small element' vector, we
2163     // must demand a source element if any DemandedElt maps to it.
2164     if ((NumElts % NumSrcElts) == 0) {
2165       unsigned Scale = NumElts / NumSrcElts;
2166       for (unsigned i = 0; i != NumElts; ++i)
2167         if (DemandedElts[i])
2168           SrcDemandedElts.setBit(i / Scale);
2169 
2170       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2171                                      TLO, Depth + 1))
2172         return true;
2173 
2174       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2175       // of the large element.
2176       // TODO - bigendian once we have test coverage.
2177       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2178         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2179         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2180         for (unsigned i = 0; i != NumElts; ++i)
2181           if (DemandedElts[i]) {
2182             unsigned Ofs = (i % Scale) * EltSizeInBits;
2183             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2184           }
2185 
2186         KnownBits Known;
2187         if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2188           return true;
2189       }
2190 
2191       // If the src element is zero/undef then all the output elements will be -
2192       // only demanded elements are guaranteed to be correct.
2193       for (unsigned i = 0; i != NumSrcElts; ++i) {
2194         if (SrcDemandedElts[i]) {
2195           if (SrcZero[i])
2196             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2197           if (SrcUndef[i])
2198             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2199         }
2200       }
2201     }
2202 
2203     // Bitcast from 'small element' src vector to 'large element' vector, we
2204     // demand all smaller source elements covered by the larger demanded element
2205     // of this vector.
2206     if ((NumSrcElts % NumElts) == 0) {
2207       unsigned Scale = NumSrcElts / NumElts;
2208       for (unsigned i = 0; i != NumElts; ++i)
2209         if (DemandedElts[i])
2210           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2211 
2212       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2213                                      TLO, Depth + 1))
2214         return true;
2215 
2216       // If all the src elements covering an output element are zero/undef, then
2217       // the output element will be as well, assuming it was demanded.
2218       for (unsigned i = 0; i != NumElts; ++i) {
2219         if (DemandedElts[i]) {
2220           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2221             KnownZero.setBit(i);
2222           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2223             KnownUndef.setBit(i);
2224         }
2225       }
2226     }
2227     break;
2228   }
2229   case ISD::BUILD_VECTOR: {
2230     // Check all elements and simplify any unused elements with UNDEF.
2231     if (!DemandedElts.isAllOnesValue()) {
2232       // Don't simplify BROADCASTS.
2233       if (llvm::any_of(Op->op_values(),
2234                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2235         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2236         bool Updated = false;
2237         for (unsigned i = 0; i != NumElts; ++i) {
2238           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2239             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2240             KnownUndef.setBit(i);
2241             Updated = true;
2242           }
2243         }
2244         if (Updated)
2245           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2246       }
2247     }
2248     for (unsigned i = 0; i != NumElts; ++i) {
2249       SDValue SrcOp = Op.getOperand(i);
2250       if (SrcOp.isUndef()) {
2251         KnownUndef.setBit(i);
2252       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2253                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2254         KnownZero.setBit(i);
2255       }
2256     }
2257     break;
2258   }
2259   case ISD::CONCAT_VECTORS: {
2260     EVT SubVT = Op.getOperand(0).getValueType();
2261     unsigned NumSubVecs = Op.getNumOperands();
2262     unsigned NumSubElts = SubVT.getVectorNumElements();
2263     for (unsigned i = 0; i != NumSubVecs; ++i) {
2264       SDValue SubOp = Op.getOperand(i);
2265       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2266       APInt SubUndef, SubZero;
2267       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2268                                      Depth + 1))
2269         return true;
2270       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2271       KnownZero.insertBits(SubZero, i * NumSubElts);
2272     }
2273     break;
2274   }
2275   case ISD::INSERT_SUBVECTOR: {
2276     if (!isa<ConstantSDNode>(Op.getOperand(2)))
2277       break;
2278     SDValue Base = Op.getOperand(0);
2279     SDValue Sub = Op.getOperand(1);
2280     EVT SubVT = Sub.getValueType();
2281     unsigned NumSubElts = SubVT.getVectorNumElements();
2282     const APInt &Idx = Op.getConstantOperandAPInt(2);
2283     if (Idx.ugt(NumElts - NumSubElts))
2284       break;
2285     unsigned SubIdx = Idx.getZExtValue();
2286     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2287     APInt SubUndef, SubZero;
2288     if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2289                                    Depth + 1))
2290       return true;
2291     APInt BaseElts = DemandedElts;
2292     BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2293 
2294     // If none of the base operand elements are demanded, replace it with undef.
2295     if (!BaseElts && !Base.isUndef())
2296       return TLO.CombineTo(Op,
2297                            TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2298                                            TLO.DAG.getUNDEF(VT),
2299                                            Op.getOperand(1),
2300                                            Op.getOperand(2)));
2301 
2302     if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2303                                    Depth + 1))
2304       return true;
2305     KnownUndef.insertBits(SubUndef, SubIdx);
2306     KnownZero.insertBits(SubZero, SubIdx);
2307     break;
2308   }
2309   case ISD::EXTRACT_SUBVECTOR: {
2310     SDValue Src = Op.getOperand(0);
2311     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2312     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2313     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2314       // Offset the demanded elts by the subvector index.
2315       uint64_t Idx = SubIdx->getZExtValue();
2316       APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2317       APInt SrcUndef, SrcZero;
2318       if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2319                                      Depth + 1))
2320         return true;
2321       KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2322       KnownZero = SrcZero.extractBits(NumElts, Idx);
2323     }
2324     break;
2325   }
2326   case ISD::INSERT_VECTOR_ELT: {
2327     SDValue Vec = Op.getOperand(0);
2328     SDValue Scl = Op.getOperand(1);
2329     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2330 
2331     // For a legal, constant insertion index, if we don't need this insertion
2332     // then strip it, else remove it from the demanded elts.
2333     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2334       unsigned Idx = CIdx->getZExtValue();
2335       if (!DemandedElts[Idx])
2336         return TLO.CombineTo(Op, Vec);
2337 
2338       APInt DemandedVecElts(DemandedElts);
2339       DemandedVecElts.clearBit(Idx);
2340       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2341                                      KnownZero, TLO, Depth + 1))
2342         return true;
2343 
2344       KnownUndef.clearBit(Idx);
2345       if (Scl.isUndef())
2346         KnownUndef.setBit(Idx);
2347 
2348       KnownZero.clearBit(Idx);
2349       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2350         KnownZero.setBit(Idx);
2351       break;
2352     }
2353 
2354     APInt VecUndef, VecZero;
2355     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2356                                    Depth + 1))
2357       return true;
2358     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2359     break;
2360   }
2361   case ISD::VSELECT: {
2362     // Try to transform the select condition based on the current demanded
2363     // elements.
2364     // TODO: If a condition element is undef, we can choose from one arm of the
2365     //       select (and if one arm is undef, then we can propagate that to the
2366     //       result).
2367     // TODO - add support for constant vselect masks (see IR version of this).
2368     APInt UnusedUndef, UnusedZero;
2369     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2370                                    UnusedZero, TLO, Depth + 1))
2371       return true;
2372 
2373     // See if we can simplify either vselect operand.
2374     APInt DemandedLHS(DemandedElts);
2375     APInt DemandedRHS(DemandedElts);
2376     APInt UndefLHS, ZeroLHS;
2377     APInt UndefRHS, ZeroRHS;
2378     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2379                                    ZeroLHS, TLO, Depth + 1))
2380       return true;
2381     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2382                                    ZeroRHS, TLO, Depth + 1))
2383       return true;
2384 
2385     KnownUndef = UndefLHS & UndefRHS;
2386     KnownZero = ZeroLHS & ZeroRHS;
2387     break;
2388   }
2389   case ISD::VECTOR_SHUFFLE: {
2390     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2391 
2392     // Collect demanded elements from shuffle operands..
2393     APInt DemandedLHS(NumElts, 0);
2394     APInt DemandedRHS(NumElts, 0);
2395     for (unsigned i = 0; i != NumElts; ++i) {
2396       int M = ShuffleMask[i];
2397       if (M < 0 || !DemandedElts[i])
2398         continue;
2399       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2400       if (M < (int)NumElts)
2401         DemandedLHS.setBit(M);
2402       else
2403         DemandedRHS.setBit(M - NumElts);
2404     }
2405 
2406     // See if we can simplify either shuffle operand.
2407     APInt UndefLHS, ZeroLHS;
2408     APInt UndefRHS, ZeroRHS;
2409     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2410                                    ZeroLHS, TLO, Depth + 1))
2411       return true;
2412     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2413                                    ZeroRHS, TLO, Depth + 1))
2414       return true;
2415 
2416     // Simplify mask using undef elements from LHS/RHS.
2417     bool Updated = false;
2418     bool IdentityLHS = true, IdentityRHS = true;
2419     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2420     for (unsigned i = 0; i != NumElts; ++i) {
2421       int &M = NewMask[i];
2422       if (M < 0)
2423         continue;
2424       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2425           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2426         Updated = true;
2427         M = -1;
2428       }
2429       IdentityLHS &= (M < 0) || (M == (int)i);
2430       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2431     }
2432 
2433     // Update legal shuffle masks based on demanded elements if it won't reduce
2434     // to Identity which can cause premature removal of the shuffle mask.
2435     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2436       SDValue LegalShuffle =
2437           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2438                                   NewMask, TLO.DAG);
2439       if (LegalShuffle)
2440         return TLO.CombineTo(Op, LegalShuffle);
2441     }
2442 
2443     // Propagate undef/zero elements from LHS/RHS.
2444     for (unsigned i = 0; i != NumElts; ++i) {
2445       int M = ShuffleMask[i];
2446       if (M < 0) {
2447         KnownUndef.setBit(i);
2448       } else if (M < (int)NumElts) {
2449         if (UndefLHS[M])
2450           KnownUndef.setBit(i);
2451         if (ZeroLHS[M])
2452           KnownZero.setBit(i);
2453       } else {
2454         if (UndefRHS[M - NumElts])
2455           KnownUndef.setBit(i);
2456         if (ZeroRHS[M - NumElts])
2457           KnownZero.setBit(i);
2458       }
2459     }
2460     break;
2461   }
2462   case ISD::ANY_EXTEND_VECTOR_INREG:
2463   case ISD::SIGN_EXTEND_VECTOR_INREG:
2464   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2465     APInt SrcUndef, SrcZero;
2466     SDValue Src = Op.getOperand(0);
2467     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2468     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2469     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2470                                    Depth + 1))
2471       return true;
2472     KnownZero = SrcZero.zextOrTrunc(NumElts);
2473     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2474 
2475     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2476         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2477         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2478       // aext - if we just need the bottom element then we can bitcast.
2479       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2480     }
2481 
2482     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2483       // zext(undef) upper bits are guaranteed to be zero.
2484       if (DemandedElts.isSubsetOf(KnownUndef))
2485         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2486       KnownUndef.clearAllBits();
2487     }
2488     break;
2489   }
2490 
2491   // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2492   // MAX, saturated math, etc.
2493   case ISD::OR:
2494   case ISD::XOR:
2495   case ISD::ADD:
2496   case ISD::SUB:
2497   case ISD::FADD:
2498   case ISD::FSUB:
2499   case ISD::FMUL:
2500   case ISD::FDIV:
2501   case ISD::FREM: {
2502     APInt UndefRHS, ZeroRHS;
2503     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2504                                    ZeroRHS, TLO, Depth + 1))
2505       return true;
2506     APInt UndefLHS, ZeroLHS;
2507     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2508                                    ZeroLHS, TLO, Depth + 1))
2509       return true;
2510 
2511     KnownZero = ZeroLHS & ZeroRHS;
2512     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2513     break;
2514   }
2515   case ISD::SHL:
2516   case ISD::SRL:
2517   case ISD::SRA:
2518   case ISD::ROTL:
2519   case ISD::ROTR: {
2520     APInt UndefRHS, ZeroRHS;
2521     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2522                                    ZeroRHS, TLO, Depth + 1))
2523       return true;
2524     APInt UndefLHS, ZeroLHS;
2525     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2526                                    ZeroLHS, TLO, Depth + 1))
2527       return true;
2528 
2529     KnownZero = ZeroLHS;
2530     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2531     break;
2532   }
2533   case ISD::MUL:
2534   case ISD::AND: {
2535     APInt SrcUndef, SrcZero;
2536     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2537                                    SrcZero, TLO, Depth + 1))
2538       return true;
2539     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2540                                    KnownZero, TLO, Depth + 1))
2541       return true;
2542 
2543     // If either side has a zero element, then the result element is zero, even
2544     // if the other is an UNDEF.
2545     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2546     // and then handle 'and' nodes with the rest of the binop opcodes.
2547     KnownZero |= SrcZero;
2548     KnownUndef &= SrcUndef;
2549     KnownUndef &= ~KnownZero;
2550     break;
2551   }
2552   case ISD::TRUNCATE:
2553   case ISD::SIGN_EXTEND:
2554   case ISD::ZERO_EXTEND:
2555     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2556                                    KnownZero, TLO, Depth + 1))
2557       return true;
2558 
2559     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2560       // zext(undef) upper bits are guaranteed to be zero.
2561       if (DemandedElts.isSubsetOf(KnownUndef))
2562         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2563       KnownUndef.clearAllBits();
2564     }
2565     break;
2566   default: {
2567     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2568       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2569                                                   KnownZero, TLO, Depth))
2570         return true;
2571     } else {
2572       KnownBits Known;
2573       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2574       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2575                                TLO, Depth, AssumeSingleUse))
2576         return true;
2577     }
2578     break;
2579   }
2580   }
2581   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2582 
2583   // Constant fold all undef cases.
2584   // TODO: Handle zero cases as well.
2585   if (DemandedElts.isSubsetOf(KnownUndef))
2586     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2587 
2588   return false;
2589 }
2590 
2591 /// Determine which of the bits specified in Mask are known to be either zero or
2592 /// one and return them in the Known.
2593 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2594                                                    KnownBits &Known,
2595                                                    const APInt &DemandedElts,
2596                                                    const SelectionDAG &DAG,
2597                                                    unsigned Depth) const {
2598   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2599           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2600           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2601           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2602          "Should use MaskedValueIsZero if you don't know whether Op"
2603          " is a target node!");
2604   Known.resetAll();
2605 }
2606 
2607 void TargetLowering::computeKnownBitsForTargetInstr(
2608     GISelKnownBits &Analysis, Register R, KnownBits &Known,
2609     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2610     unsigned Depth) const {
2611   Known.resetAll();
2612 }
2613 
2614 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2615                                                    KnownBits &Known,
2616                                                    const APInt &DemandedElts,
2617                                                    const SelectionDAG &DAG,
2618                                                    unsigned Depth) const {
2619   assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2620 
2621   if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2622     // The low bits are known zero if the pointer is aligned.
2623     Known.Zero.setLowBits(Log2_32(Align));
2624   }
2625 }
2626 
2627 /// This method can be implemented by targets that want to expose additional
2628 /// information about sign bits to the DAG Combiner.
2629 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2630                                                          const APInt &,
2631                                                          const SelectionDAG &,
2632                                                          unsigned Depth) const {
2633   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2634           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2635           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2636           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2637          "Should use ComputeNumSignBits if you don't know whether Op"
2638          " is a target node!");
2639   return 1;
2640 }
2641 
2642 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2643     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2644     TargetLoweringOpt &TLO, unsigned Depth) const {
2645   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2646           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2647           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2648           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2649          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2650          " is a target node!");
2651   return false;
2652 }
2653 
2654 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2655     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2656     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2657   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2658           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2659           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2660           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2661          "Should use SimplifyDemandedBits if you don't know whether Op"
2662          " is a target node!");
2663   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2664   return false;
2665 }
2666 
2667 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2668     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2669     SelectionDAG &DAG, unsigned Depth) const {
2670   assert(
2671       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2672        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2673        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2674        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2675       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2676       " is a target node!");
2677   return SDValue();
2678 }
2679 
2680 SDValue
2681 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2682                                         SDValue N1, MutableArrayRef<int> Mask,
2683                                         SelectionDAG &DAG) const {
2684   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2685   if (!LegalMask) {
2686     std::swap(N0, N1);
2687     ShuffleVectorSDNode::commuteMask(Mask);
2688     LegalMask = isShuffleMaskLegal(Mask, VT);
2689   }
2690 
2691   if (!LegalMask)
2692     return SDValue();
2693 
2694   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2695 }
2696 
2697 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2698   return nullptr;
2699 }
2700 
2701 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2702                                                   const SelectionDAG &DAG,
2703                                                   bool SNaN,
2704                                                   unsigned Depth) const {
2705   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2706           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2707           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2708           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2709          "Should use isKnownNeverNaN if you don't know whether Op"
2710          " is a target node!");
2711   return false;
2712 }
2713 
2714 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2715 // work with truncating build vectors and vectors with elements of less than
2716 // 8 bits.
2717 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2718   if (!N)
2719     return false;
2720 
2721   APInt CVal;
2722   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2723     CVal = CN->getAPIntValue();
2724   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2725     auto *CN = BV->getConstantSplatNode();
2726     if (!CN)
2727       return false;
2728 
2729     // If this is a truncating build vector, truncate the splat value.
2730     // Otherwise, we may fail to match the expected values below.
2731     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2732     CVal = CN->getAPIntValue();
2733     if (BVEltWidth < CVal.getBitWidth())
2734       CVal = CVal.trunc(BVEltWidth);
2735   } else {
2736     return false;
2737   }
2738 
2739   switch (getBooleanContents(N->getValueType(0))) {
2740   case UndefinedBooleanContent:
2741     return CVal[0];
2742   case ZeroOrOneBooleanContent:
2743     return CVal.isOneValue();
2744   case ZeroOrNegativeOneBooleanContent:
2745     return CVal.isAllOnesValue();
2746   }
2747 
2748   llvm_unreachable("Invalid boolean contents");
2749 }
2750 
2751 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2752   if (!N)
2753     return false;
2754 
2755   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2756   if (!CN) {
2757     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2758     if (!BV)
2759       return false;
2760 
2761     // Only interested in constant splats, we don't care about undef
2762     // elements in identifying boolean constants and getConstantSplatNode
2763     // returns NULL if all ops are undef;
2764     CN = BV->getConstantSplatNode();
2765     if (!CN)
2766       return false;
2767   }
2768 
2769   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2770     return !CN->getAPIntValue()[0];
2771 
2772   return CN->isNullValue();
2773 }
2774 
2775 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2776                                        bool SExt) const {
2777   if (VT == MVT::i1)
2778     return N->isOne();
2779 
2780   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2781   switch (Cnt) {
2782   case TargetLowering::ZeroOrOneBooleanContent:
2783     // An extended value of 1 is always true, unless its original type is i1,
2784     // in which case it will be sign extended to -1.
2785     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
2786   case TargetLowering::UndefinedBooleanContent:
2787   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2788     return N->isAllOnesValue() && SExt;
2789   }
2790   llvm_unreachable("Unexpected enumeration.");
2791 }
2792 
2793 /// This helper function of SimplifySetCC tries to optimize the comparison when
2794 /// either operand of the SetCC node is a bitwise-and instruction.
2795 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2796                                          ISD::CondCode Cond, const SDLoc &DL,
2797                                          DAGCombinerInfo &DCI) const {
2798   // Match these patterns in any of their permutations:
2799   // (X & Y) == Y
2800   // (X & Y) != Y
2801   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
2802     std::swap(N0, N1);
2803 
2804   EVT OpVT = N0.getValueType();
2805   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
2806       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
2807     return SDValue();
2808 
2809   SDValue X, Y;
2810   if (N0.getOperand(0) == N1) {
2811     X = N0.getOperand(1);
2812     Y = N0.getOperand(0);
2813   } else if (N0.getOperand(1) == N1) {
2814     X = N0.getOperand(0);
2815     Y = N0.getOperand(1);
2816   } else {
2817     return SDValue();
2818   }
2819 
2820   SelectionDAG &DAG = DCI.DAG;
2821   SDValue Zero = DAG.getConstant(0, DL, OpVT);
2822   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2823     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2824     // Note that where Y is variable and is known to have at most one bit set
2825     // (for example, if it is Z & 1) we cannot do this; the expressions are not
2826     // equivalent when Y == 0.
2827     assert(OpVT.isInteger());
2828     Cond = ISD::getSetCCInverse(Cond, OpVT);
2829     if (DCI.isBeforeLegalizeOps() ||
2830         isCondCodeLegal(Cond, N0.getSimpleValueType()))
2831       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2832   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
2833     // If the target supports an 'and-not' or 'and-complement' logic operation,
2834     // try to use that to make a comparison operation more efficient.
2835     // But don't do this transform if the mask is a single bit because there are
2836     // more efficient ways to deal with that case (for example, 'bt' on x86 or
2837     // 'rlwinm' on PPC).
2838 
2839     // Bail out if the compare operand that we want to turn into a zero is
2840     // already a zero (otherwise, infinite loop).
2841     auto *YConst = dyn_cast<ConstantSDNode>(Y);
2842     if (YConst && YConst->isNullValue())
2843       return SDValue();
2844 
2845     // Transform this into: ~X & Y == 0.
2846     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2847     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2848     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2849   }
2850 
2851   return SDValue();
2852 }
2853 
2854 /// There are multiple IR patterns that could be checking whether certain
2855 /// truncation of a signed number would be lossy or not. The pattern which is
2856 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2857 /// We are looking for the following pattern: (KeptBits is a constant)
2858 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2859 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2860 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
2861 /// We will unfold it into the natural trunc+sext pattern:
2862 ///   ((%x << C) a>> C) dstcond %x
2863 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
2864 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2865     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2866     const SDLoc &DL) const {
2867   // We must be comparing with a constant.
2868   ConstantSDNode *C1;
2869   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2870     return SDValue();
2871 
2872   // N0 should be:  add %x, (1 << (KeptBits-1))
2873   if (N0->getOpcode() != ISD::ADD)
2874     return SDValue();
2875 
2876   // And we must be 'add'ing a constant.
2877   ConstantSDNode *C01;
2878   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2879     return SDValue();
2880 
2881   SDValue X = N0->getOperand(0);
2882   EVT XVT = X.getValueType();
2883 
2884   // Validate constants ...
2885 
2886   APInt I1 = C1->getAPIntValue();
2887 
2888   ISD::CondCode NewCond;
2889   if (Cond == ISD::CondCode::SETULT) {
2890     NewCond = ISD::CondCode::SETEQ;
2891   } else if (Cond == ISD::CondCode::SETULE) {
2892     NewCond = ISD::CondCode::SETEQ;
2893     // But need to 'canonicalize' the constant.
2894     I1 += 1;
2895   } else if (Cond == ISD::CondCode::SETUGT) {
2896     NewCond = ISD::CondCode::SETNE;
2897     // But need to 'canonicalize' the constant.
2898     I1 += 1;
2899   } else if (Cond == ISD::CondCode::SETUGE) {
2900     NewCond = ISD::CondCode::SETNE;
2901   } else
2902     return SDValue();
2903 
2904   APInt I01 = C01->getAPIntValue();
2905 
2906   auto checkConstants = [&I1, &I01]() -> bool {
2907     // Both of them must be power-of-two, and the constant from setcc is bigger.
2908     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2909   };
2910 
2911   if (checkConstants()) {
2912     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
2913   } else {
2914     // What if we invert constants? (and the target predicate)
2915     I1.negate();
2916     I01.negate();
2917     assert(XVT.isInteger());
2918     NewCond = getSetCCInverse(NewCond, XVT);
2919     if (!checkConstants())
2920       return SDValue();
2921     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
2922   }
2923 
2924   // They are power-of-two, so which bit is set?
2925   const unsigned KeptBits = I1.logBase2();
2926   const unsigned KeptBitsMinusOne = I01.logBase2();
2927 
2928   // Magic!
2929   if (KeptBits != (KeptBitsMinusOne + 1))
2930     return SDValue();
2931   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2932 
2933   // We don't want to do this in every single case.
2934   SelectionDAG &DAG = DCI.DAG;
2935   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2936           XVT, KeptBits))
2937     return SDValue();
2938 
2939   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2940   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2941 
2942   // Unfold into:  ((%x << C) a>> C) cond %x
2943   // Where 'cond' will be either 'eq' or 'ne'.
2944   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2945   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2946   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2947   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2948 
2949   return T2;
2950 }
2951 
2952 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
2953 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
2954     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
2955     DAGCombinerInfo &DCI, const SDLoc &DL) const {
2956   assert(isConstOrConstSplat(N1C) &&
2957          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
2958          "Should be a comparison with 0.");
2959   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2960          "Valid only for [in]equality comparisons.");
2961 
2962   unsigned NewShiftOpcode;
2963   SDValue X, C, Y;
2964 
2965   SelectionDAG &DAG = DCI.DAG;
2966   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2967 
2968   // Look for '(C l>>/<< Y)'.
2969   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
2970     // The shift should be one-use.
2971     if (!V.hasOneUse())
2972       return false;
2973     unsigned OldShiftOpcode = V.getOpcode();
2974     switch (OldShiftOpcode) {
2975     case ISD::SHL:
2976       NewShiftOpcode = ISD::SRL;
2977       break;
2978     case ISD::SRL:
2979       NewShiftOpcode = ISD::SHL;
2980       break;
2981     default:
2982       return false; // must be a logical shift.
2983     }
2984     // We should be shifting a constant.
2985     // FIXME: best to use isConstantOrConstantVector().
2986     C = V.getOperand(0);
2987     ConstantSDNode *CC =
2988         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2989     if (!CC)
2990       return false;
2991     Y = V.getOperand(1);
2992 
2993     ConstantSDNode *XC =
2994         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2995     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2996         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
2997   };
2998 
2999   // LHS of comparison should be an one-use 'and'.
3000   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3001     return SDValue();
3002 
3003   X = N0.getOperand(0);
3004   SDValue Mask = N0.getOperand(1);
3005 
3006   // 'and' is commutative!
3007   if (!Match(Mask)) {
3008     std::swap(X, Mask);
3009     if (!Match(Mask))
3010       return SDValue();
3011   }
3012 
3013   EVT VT = X.getValueType();
3014 
3015   // Produce:
3016   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3017   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3018   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3019   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3020   return T2;
3021 }
3022 
3023 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3024 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3025 /// handle the commuted versions of these patterns.
3026 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3027                                            ISD::CondCode Cond, const SDLoc &DL,
3028                                            DAGCombinerInfo &DCI) const {
3029   unsigned BOpcode = N0.getOpcode();
3030   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3031          "Unexpected binop");
3032   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3033 
3034   // (X + Y) == X --> Y == 0
3035   // (X - Y) == X --> Y == 0
3036   // (X ^ Y) == X --> Y == 0
3037   SelectionDAG &DAG = DCI.DAG;
3038   EVT OpVT = N0.getValueType();
3039   SDValue X = N0.getOperand(0);
3040   SDValue Y = N0.getOperand(1);
3041   if (X == N1)
3042     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3043 
3044   if (Y != N1)
3045     return SDValue();
3046 
3047   // (X + Y) == Y --> X == 0
3048   // (X ^ Y) == Y --> X == 0
3049   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3050     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3051 
3052   // The shift would not be valid if the operands are boolean (i1).
3053   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3054     return SDValue();
3055 
3056   // (X - Y) == Y --> X == Y << 1
3057   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3058                                  !DCI.isBeforeLegalize());
3059   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3060   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3061   if (!DCI.isCalledByLegalizer())
3062     DCI.AddToWorklist(YShl1.getNode());
3063   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3064 }
3065 
3066 /// Try to simplify a setcc built with the specified operands and cc. If it is
3067 /// unable to simplify it, return a null SDValue.
3068 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3069                                       ISD::CondCode Cond, bool foldBooleans,
3070                                       DAGCombinerInfo &DCI,
3071                                       const SDLoc &dl) const {
3072   SelectionDAG &DAG = DCI.DAG;
3073   const DataLayout &Layout = DAG.getDataLayout();
3074   EVT OpVT = N0.getValueType();
3075 
3076   // Constant fold or commute setcc.
3077   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3078     return Fold;
3079 
3080   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3081   // TODO: Handle non-splat vector constants. All undef causes trouble.
3082   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3083   if (isConstOrConstSplat(N0) &&
3084       (DCI.isBeforeLegalizeOps() ||
3085        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3086     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3087 
3088   // If we have a subtract with the same 2 non-constant operands as this setcc
3089   // -- but in reverse order -- then try to commute the operands of this setcc
3090   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3091   // instruction on some targets.
3092   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3093       (DCI.isBeforeLegalizeOps() ||
3094        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3095       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3096       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3097     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3098 
3099   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3100     const APInt &C1 = N1C->getAPIntValue();
3101 
3102     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3103     // equality comparison, then we're just comparing whether X itself is
3104     // zero.
3105     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3106         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3107         N0.getOperand(1).getOpcode() == ISD::Constant) {
3108       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3109       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3110           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3111         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3112           // (srl (ctlz x), 5) == 0  -> X != 0
3113           // (srl (ctlz x), 5) != 1  -> X != 0
3114           Cond = ISD::SETNE;
3115         } else {
3116           // (srl (ctlz x), 5) != 0  -> X == 0
3117           // (srl (ctlz x), 5) == 1  -> X == 0
3118           Cond = ISD::SETEQ;
3119         }
3120         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3121         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3122                             Zero, Cond);
3123       }
3124     }
3125 
3126     SDValue CTPOP = N0;
3127     // Look through truncs that don't change the value of a ctpop.
3128     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3129       CTPOP = N0.getOperand(0);
3130 
3131     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3132         (N0 == CTPOP ||
3133          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3134       EVT CTVT = CTPOP.getValueType();
3135       SDValue CTOp = CTPOP.getOperand(0);
3136 
3137       // (ctpop x) u< 2 -> (x & x-1) == 0
3138       // (ctpop x) u> 1 -> (x & x-1) != 0
3139       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3140         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3141         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3142         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3143         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3144         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3145       }
3146 
3147       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3148       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3149           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3150         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3151         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3152         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3153         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3154         assert(CTVT.isInteger());
3155         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3156         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3157         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3158         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3159         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3160         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3161         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3162       }
3163     }
3164 
3165     // (zext x) == C --> x == (trunc C)
3166     // (sext x) == C --> x == (trunc C)
3167     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3168         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3169       unsigned MinBits = N0.getValueSizeInBits();
3170       SDValue PreExt;
3171       bool Signed = false;
3172       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3173         // ZExt
3174         MinBits = N0->getOperand(0).getValueSizeInBits();
3175         PreExt = N0->getOperand(0);
3176       } else if (N0->getOpcode() == ISD::AND) {
3177         // DAGCombine turns costly ZExts into ANDs
3178         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3179           if ((C->getAPIntValue()+1).isPowerOf2()) {
3180             MinBits = C->getAPIntValue().countTrailingOnes();
3181             PreExt = N0->getOperand(0);
3182           }
3183       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3184         // SExt
3185         MinBits = N0->getOperand(0).getValueSizeInBits();
3186         PreExt = N0->getOperand(0);
3187         Signed = true;
3188       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3189         // ZEXTLOAD / SEXTLOAD
3190         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3191           MinBits = LN0->getMemoryVT().getSizeInBits();
3192           PreExt = N0;
3193         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3194           Signed = true;
3195           MinBits = LN0->getMemoryVT().getSizeInBits();
3196           PreExt = N0;
3197         }
3198       }
3199 
3200       // Figure out how many bits we need to preserve this constant.
3201       unsigned ReqdBits = Signed ?
3202         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3203         C1.getActiveBits();
3204 
3205       // Make sure we're not losing bits from the constant.
3206       if (MinBits > 0 &&
3207           MinBits < C1.getBitWidth() &&
3208           MinBits >= ReqdBits) {
3209         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3210         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3211           // Will get folded away.
3212           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3213           if (MinBits == 1 && C1 == 1)
3214             // Invert the condition.
3215             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3216                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3217           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3218           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3219         }
3220 
3221         // If truncating the setcc operands is not desirable, we can still
3222         // simplify the expression in some cases:
3223         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3224         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3225         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3226         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3227         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3228         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3229         SDValue TopSetCC = N0->getOperand(0);
3230         unsigned N0Opc = N0->getOpcode();
3231         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3232         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3233             TopSetCC.getOpcode() == ISD::SETCC &&
3234             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3235             (isConstFalseVal(N1C) ||
3236              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3237 
3238           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3239                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3240 
3241           if (!Inverse)
3242             return TopSetCC;
3243 
3244           ISD::CondCode InvCond = ISD::getSetCCInverse(
3245               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3246               TopSetCC.getOperand(0).getValueType());
3247           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3248                                       TopSetCC.getOperand(1),
3249                                       InvCond);
3250         }
3251       }
3252     }
3253 
3254     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3255     // equality or unsigned, and all 1 bits of the const are in the same
3256     // partial word, see if we can shorten the load.
3257     if (DCI.isBeforeLegalize() &&
3258         !ISD::isSignedIntSetCC(Cond) &&
3259         N0.getOpcode() == ISD::AND && C1 == 0 &&
3260         N0.getNode()->hasOneUse() &&
3261         isa<LoadSDNode>(N0.getOperand(0)) &&
3262         N0.getOperand(0).getNode()->hasOneUse() &&
3263         isa<ConstantSDNode>(N0.getOperand(1))) {
3264       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3265       APInt bestMask;
3266       unsigned bestWidth = 0, bestOffset = 0;
3267       if (Lod->isSimple() && Lod->isUnindexed()) {
3268         unsigned origWidth = N0.getValueSizeInBits();
3269         unsigned maskWidth = origWidth;
3270         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3271         // 8 bits, but have to be careful...
3272         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3273           origWidth = Lod->getMemoryVT().getSizeInBits();
3274         const APInt &Mask = N0.getConstantOperandAPInt(1);
3275         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3276           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3277           for (unsigned offset=0; offset<origWidth/width; offset++) {
3278             if (Mask.isSubsetOf(newMask)) {
3279               if (Layout.isLittleEndian())
3280                 bestOffset = (uint64_t)offset * (width/8);
3281               else
3282                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3283               bestMask = Mask.lshr(offset * (width/8) * 8);
3284               bestWidth = width;
3285               break;
3286             }
3287             newMask <<= width;
3288           }
3289         }
3290       }
3291       if (bestWidth) {
3292         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3293         if (newVT.isRound() &&
3294             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3295           SDValue Ptr = Lod->getBasePtr();
3296           if (bestOffset != 0)
3297             Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
3298           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3299           SDValue NewLoad = DAG.getLoad(
3300               newVT, dl, Lod->getChain(), Ptr,
3301               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3302           return DAG.getSetCC(dl, VT,
3303                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3304                                       DAG.getConstant(bestMask.trunc(bestWidth),
3305                                                       dl, newVT)),
3306                               DAG.getConstant(0LL, dl, newVT), Cond);
3307         }
3308       }
3309     }
3310 
3311     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3312     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3313       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3314 
3315       // If the comparison constant has bits in the upper part, the
3316       // zero-extended value could never match.
3317       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3318                                               C1.getBitWidth() - InSize))) {
3319         switch (Cond) {
3320         case ISD::SETUGT:
3321         case ISD::SETUGE:
3322         case ISD::SETEQ:
3323           return DAG.getConstant(0, dl, VT);
3324         case ISD::SETULT:
3325         case ISD::SETULE:
3326         case ISD::SETNE:
3327           return DAG.getConstant(1, dl, VT);
3328         case ISD::SETGT:
3329         case ISD::SETGE:
3330           // True if the sign bit of C1 is set.
3331           return DAG.getConstant(C1.isNegative(), dl, VT);
3332         case ISD::SETLT:
3333         case ISD::SETLE:
3334           // True if the sign bit of C1 isn't set.
3335           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3336         default:
3337           break;
3338         }
3339       }
3340 
3341       // Otherwise, we can perform the comparison with the low bits.
3342       switch (Cond) {
3343       case ISD::SETEQ:
3344       case ISD::SETNE:
3345       case ISD::SETUGT:
3346       case ISD::SETUGE:
3347       case ISD::SETULT:
3348       case ISD::SETULE: {
3349         EVT newVT = N0.getOperand(0).getValueType();
3350         if (DCI.isBeforeLegalizeOps() ||
3351             (isOperationLegal(ISD::SETCC, newVT) &&
3352              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3353           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3354           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3355 
3356           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3357                                           NewConst, Cond);
3358           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3359         }
3360         break;
3361       }
3362       default:
3363         break; // todo, be more careful with signed comparisons
3364       }
3365     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3366                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3367       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3368       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3369       EVT ExtDstTy = N0.getValueType();
3370       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3371 
3372       // If the constant doesn't fit into the number of bits for the source of
3373       // the sign extension, it is impossible for both sides to be equal.
3374       if (C1.getMinSignedBits() > ExtSrcTyBits)
3375         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3376 
3377       SDValue ZextOp;
3378       EVT Op0Ty = N0.getOperand(0).getValueType();
3379       if (Op0Ty == ExtSrcTy) {
3380         ZextOp = N0.getOperand(0);
3381       } else {
3382         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3383         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3384                              DAG.getConstant(Imm, dl, Op0Ty));
3385       }
3386       if (!DCI.isCalledByLegalizer())
3387         DCI.AddToWorklist(ZextOp.getNode());
3388       // Otherwise, make this a use of a zext.
3389       return DAG.getSetCC(dl, VT, ZextOp,
3390                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3391                                                               ExtDstTyBits,
3392                                                               ExtSrcTyBits),
3393                                           dl, ExtDstTy),
3394                           Cond);
3395     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3396                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3397       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3398       if (N0.getOpcode() == ISD::SETCC &&
3399           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3400           (N0.getValueType() == MVT::i1 ||
3401            getBooleanContents(N0.getOperand(0).getValueType()) ==
3402                        ZeroOrOneBooleanContent)) {
3403         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3404         if (TrueWhenTrue)
3405           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3406         // Invert the condition.
3407         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3408         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3409         if (DCI.isBeforeLegalizeOps() ||
3410             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3411           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3412       }
3413 
3414       if ((N0.getOpcode() == ISD::XOR ||
3415            (N0.getOpcode() == ISD::AND &&
3416             N0.getOperand(0).getOpcode() == ISD::XOR &&
3417             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3418           isa<ConstantSDNode>(N0.getOperand(1)) &&
3419           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3420         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3421         // can only do this if the top bits are known zero.
3422         unsigned BitWidth = N0.getValueSizeInBits();
3423         if (DAG.MaskedValueIsZero(N0,
3424                                   APInt::getHighBitsSet(BitWidth,
3425                                                         BitWidth-1))) {
3426           // Okay, get the un-inverted input value.
3427           SDValue Val;
3428           if (N0.getOpcode() == ISD::XOR) {
3429             Val = N0.getOperand(0);
3430           } else {
3431             assert(N0.getOpcode() == ISD::AND &&
3432                     N0.getOperand(0).getOpcode() == ISD::XOR);
3433             // ((X^1)&1)^1 -> X & 1
3434             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3435                               N0.getOperand(0).getOperand(0),
3436                               N0.getOperand(1));
3437           }
3438 
3439           return DAG.getSetCC(dl, VT, Val, N1,
3440                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3441         }
3442       } else if (N1C->isOne()) {
3443         SDValue Op0 = N0;
3444         if (Op0.getOpcode() == ISD::TRUNCATE)
3445           Op0 = Op0.getOperand(0);
3446 
3447         if ((Op0.getOpcode() == ISD::XOR) &&
3448             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3449             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3450           SDValue XorLHS = Op0.getOperand(0);
3451           SDValue XorRHS = Op0.getOperand(1);
3452           // Ensure that the input setccs return an i1 type or 0/1 value.
3453           if (Op0.getValueType() == MVT::i1 ||
3454               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3455                       ZeroOrOneBooleanContent &&
3456                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3457                         ZeroOrOneBooleanContent)) {
3458             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3459             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3460             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3461           }
3462         }
3463         if (Op0.getOpcode() == ISD::AND &&
3464             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3465             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3466           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3467           if (Op0.getValueType().bitsGT(VT))
3468             Op0 = DAG.getNode(ISD::AND, dl, VT,
3469                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3470                           DAG.getConstant(1, dl, VT));
3471           else if (Op0.getValueType().bitsLT(VT))
3472             Op0 = DAG.getNode(ISD::AND, dl, VT,
3473                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3474                         DAG.getConstant(1, dl, VT));
3475 
3476           return DAG.getSetCC(dl, VT, Op0,
3477                               DAG.getConstant(0, dl, Op0.getValueType()),
3478                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3479         }
3480         if (Op0.getOpcode() == ISD::AssertZext &&
3481             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3482           return DAG.getSetCC(dl, VT, Op0,
3483                               DAG.getConstant(0, dl, Op0.getValueType()),
3484                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3485       }
3486     }
3487 
3488     // Given:
3489     //   icmp eq/ne (urem %x, %y), 0
3490     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3491     //   icmp eq/ne %x, 0
3492     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3493         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3494       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3495       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3496       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3497         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3498     }
3499 
3500     if (SDValue V =
3501             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3502       return V;
3503   }
3504 
3505   // These simplifications apply to splat vectors as well.
3506   // TODO: Handle more splat vector cases.
3507   if (auto *N1C = isConstOrConstSplat(N1)) {
3508     const APInt &C1 = N1C->getAPIntValue();
3509 
3510     APInt MinVal, MaxVal;
3511     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3512     if (ISD::isSignedIntSetCC(Cond)) {
3513       MinVal = APInt::getSignedMinValue(OperandBitSize);
3514       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3515     } else {
3516       MinVal = APInt::getMinValue(OperandBitSize);
3517       MaxVal = APInt::getMaxValue(OperandBitSize);
3518     }
3519 
3520     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3521     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3522       // X >= MIN --> true
3523       if (C1 == MinVal)
3524         return DAG.getBoolConstant(true, dl, VT, OpVT);
3525 
3526       if (!VT.isVector()) { // TODO: Support this for vectors.
3527         // X >= C0 --> X > (C0 - 1)
3528         APInt C = C1 - 1;
3529         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3530         if ((DCI.isBeforeLegalizeOps() ||
3531              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3532             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3533                                   isLegalICmpImmediate(C.getSExtValue())))) {
3534           return DAG.getSetCC(dl, VT, N0,
3535                               DAG.getConstant(C, dl, N1.getValueType()),
3536                               NewCC);
3537         }
3538       }
3539     }
3540 
3541     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3542       // X <= MAX --> true
3543       if (C1 == MaxVal)
3544         return DAG.getBoolConstant(true, dl, VT, OpVT);
3545 
3546       // X <= C0 --> X < (C0 + 1)
3547       if (!VT.isVector()) { // TODO: Support this for vectors.
3548         APInt C = C1 + 1;
3549         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3550         if ((DCI.isBeforeLegalizeOps() ||
3551              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3552             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3553                                   isLegalICmpImmediate(C.getSExtValue())))) {
3554           return DAG.getSetCC(dl, VT, N0,
3555                               DAG.getConstant(C, dl, N1.getValueType()),
3556                               NewCC);
3557         }
3558       }
3559     }
3560 
3561     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3562       if (C1 == MinVal)
3563         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3564 
3565       // TODO: Support this for vectors after legalize ops.
3566       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3567         // Canonicalize setlt X, Max --> setne X, Max
3568         if (C1 == MaxVal)
3569           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3570 
3571         // If we have setult X, 1, turn it into seteq X, 0
3572         if (C1 == MinVal+1)
3573           return DAG.getSetCC(dl, VT, N0,
3574                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3575                               ISD::SETEQ);
3576       }
3577     }
3578 
3579     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3580       if (C1 == MaxVal)
3581         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3582 
3583       // TODO: Support this for vectors after legalize ops.
3584       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3585         // Canonicalize setgt X, Min --> setne X, Min
3586         if (C1 == MinVal)
3587           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3588 
3589         // If we have setugt X, Max-1, turn it into seteq X, Max
3590         if (C1 == MaxVal-1)
3591           return DAG.getSetCC(dl, VT, N0,
3592                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3593                               ISD::SETEQ);
3594       }
3595     }
3596 
3597     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3598       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3599       if (C1.isNullValue())
3600         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3601                 VT, N0, N1, Cond, DCI, dl))
3602           return CC;
3603     }
3604 
3605     // If we have "setcc X, C0", check to see if we can shrink the immediate
3606     // by changing cc.
3607     // TODO: Support this for vectors after legalize ops.
3608     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3609       // SETUGT X, SINTMAX  -> SETLT X, 0
3610       if (Cond == ISD::SETUGT &&
3611           C1 == APInt::getSignedMaxValue(OperandBitSize))
3612         return DAG.getSetCC(dl, VT, N0,
3613                             DAG.getConstant(0, dl, N1.getValueType()),
3614                             ISD::SETLT);
3615 
3616       // SETULT X, SINTMIN  -> SETGT X, -1
3617       if (Cond == ISD::SETULT &&
3618           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3619         SDValue ConstMinusOne =
3620             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3621                             N1.getValueType());
3622         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3623       }
3624     }
3625   }
3626 
3627   // Back to non-vector simplifications.
3628   // TODO: Can we do these for vector splats?
3629   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3630     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3631     const APInt &C1 = N1C->getAPIntValue();
3632     EVT ShValTy = N0.getValueType();
3633 
3634     // Fold bit comparisons when we can.
3635     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3636         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3637         N0.getOpcode() == ISD::AND) {
3638       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3639         EVT ShiftTy =
3640             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3641         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3642           // Perform the xform if the AND RHS is a single bit.
3643           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3644           if (AndRHS->getAPIntValue().isPowerOf2() &&
3645               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3646             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3647                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3648                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3649           }
3650         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3651           // (X & 8) == 8  -->  (X & 8) >> 3
3652           // Perform the xform if C1 is a single bit.
3653           unsigned ShCt = C1.logBase2();
3654           if (C1.isPowerOf2() &&
3655               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3656             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3657                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3658                                            DAG.getConstant(ShCt, dl, ShiftTy)));
3659           }
3660         }
3661       }
3662     }
3663 
3664     if (C1.getMinSignedBits() <= 64 &&
3665         !isLegalICmpImmediate(C1.getSExtValue())) {
3666       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3667       // (X & -256) == 256 -> (X >> 8) == 1
3668       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3669           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3670         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3671           const APInt &AndRHSC = AndRHS->getAPIntValue();
3672           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3673             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3674             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3675               SDValue Shift =
3676                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3677                             DAG.getConstant(ShiftBits, dl, ShiftTy));
3678               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3679               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3680             }
3681           }
3682         }
3683       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3684                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3685         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3686         // X <  0x100000000 -> (X >> 32) <  1
3687         // X >= 0x100000000 -> (X >> 32) >= 1
3688         // X <= 0x0ffffffff -> (X >> 32) <  1
3689         // X >  0x0ffffffff -> (X >> 32) >= 1
3690         unsigned ShiftBits;
3691         APInt NewC = C1;
3692         ISD::CondCode NewCond = Cond;
3693         if (AdjOne) {
3694           ShiftBits = C1.countTrailingOnes();
3695           NewC = NewC + 1;
3696           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3697         } else {
3698           ShiftBits = C1.countTrailingZeros();
3699         }
3700         NewC.lshrInPlace(ShiftBits);
3701         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3702             isLegalICmpImmediate(NewC.getSExtValue()) &&
3703             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3704           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3705                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3706           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3707           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3708         }
3709       }
3710     }
3711   }
3712 
3713   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3714     auto *CFP = cast<ConstantFPSDNode>(N1);
3715     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3716 
3717     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3718     // constant if knowing that the operand is non-nan is enough.  We prefer to
3719     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3720     // materialize 0.0.
3721     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3722       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3723 
3724     // setcc (fneg x), C -> setcc swap(pred) x, -C
3725     if (N0.getOpcode() == ISD::FNEG) {
3726       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3727       if (DCI.isBeforeLegalizeOps() ||
3728           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3729         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3730         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3731       }
3732     }
3733 
3734     // If the condition is not legal, see if we can find an equivalent one
3735     // which is legal.
3736     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3737       // If the comparison was an awkward floating-point == or != and one of
3738       // the comparison operands is infinity or negative infinity, convert the
3739       // condition to a less-awkward <= or >=.
3740       if (CFP->getValueAPF().isInfinity()) {
3741         if (CFP->getValueAPF().isNegative()) {
3742           if (Cond == ISD::SETOEQ &&
3743               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3744             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3745           if (Cond == ISD::SETUEQ &&
3746               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3747             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3748           if (Cond == ISD::SETUNE &&
3749               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3750             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3751           if (Cond == ISD::SETONE &&
3752               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3753             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3754         } else {
3755           if (Cond == ISD::SETOEQ &&
3756               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3757             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3758           if (Cond == ISD::SETUEQ &&
3759               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3760             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3761           if (Cond == ISD::SETUNE &&
3762               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3763             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3764           if (Cond == ISD::SETONE &&
3765               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3766             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3767         }
3768       }
3769     }
3770   }
3771 
3772   if (N0 == N1) {
3773     // The sext(setcc()) => setcc() optimization relies on the appropriate
3774     // constant being emitted.
3775     assert(!N0.getValueType().isInteger() &&
3776            "Integer types should be handled by FoldSetCC");
3777 
3778     bool EqTrue = ISD::isTrueWhenEqual(Cond);
3779     unsigned UOF = ISD::getUnorderedFlavor(Cond);
3780     if (UOF == 2) // FP operators that are undefined on NaNs.
3781       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3782     if (UOF == unsigned(EqTrue))
3783       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3784     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
3785     // if it is not already.
3786     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
3787     if (NewCond != Cond &&
3788         (DCI.isBeforeLegalizeOps() ||
3789                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
3790       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3791   }
3792 
3793   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3794       N0.getValueType().isInteger()) {
3795     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
3796         N0.getOpcode() == ISD::XOR) {
3797       // Simplify (X+Y) == (X+Z) -->  Y == Z
3798       if (N0.getOpcode() == N1.getOpcode()) {
3799         if (N0.getOperand(0) == N1.getOperand(0))
3800           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3801         if (N0.getOperand(1) == N1.getOperand(1))
3802           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3803         if (isCommutativeBinOp(N0.getOpcode())) {
3804           // If X op Y == Y op X, try other combinations.
3805           if (N0.getOperand(0) == N1.getOperand(1))
3806             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3807                                 Cond);
3808           if (N0.getOperand(1) == N1.getOperand(0))
3809             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3810                                 Cond);
3811         }
3812       }
3813 
3814       // If RHS is a legal immediate value for a compare instruction, we need
3815       // to be careful about increasing register pressure needlessly.
3816       bool LegalRHSImm = false;
3817 
3818       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3819         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3820           // Turn (X+C1) == C2 --> X == C2-C1
3821           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
3822             return DAG.getSetCC(dl, VT, N0.getOperand(0),
3823                                 DAG.getConstant(RHSC->getAPIntValue()-
3824                                                 LHSR->getAPIntValue(),
3825                                 dl, N0.getValueType()), Cond);
3826           }
3827 
3828           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3829           if (N0.getOpcode() == ISD::XOR)
3830             // If we know that all of the inverted bits are zero, don't bother
3831             // performing the inversion.
3832             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3833               return
3834                 DAG.getSetCC(dl, VT, N0.getOperand(0),
3835                              DAG.getConstant(LHSR->getAPIntValue() ^
3836                                                RHSC->getAPIntValue(),
3837                                              dl, N0.getValueType()),
3838                              Cond);
3839         }
3840 
3841         // Turn (C1-X) == C2 --> X == C1-C2
3842         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3843           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3844             return
3845               DAG.getSetCC(dl, VT, N0.getOperand(1),
3846                            DAG.getConstant(SUBC->getAPIntValue() -
3847                                              RHSC->getAPIntValue(),
3848                                            dl, N0.getValueType()),
3849                            Cond);
3850           }
3851         }
3852 
3853         // Could RHSC fold directly into a compare?
3854         if (RHSC->getValueType(0).getSizeInBits() <= 64)
3855           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3856       }
3857 
3858       // (X+Y) == X --> Y == 0 and similar folds.
3859       // Don't do this if X is an immediate that can fold into a cmp
3860       // instruction and X+Y has other uses. It could be an induction variable
3861       // chain, and the transform would increase register pressure.
3862       if (!LegalRHSImm || N0.hasOneUse())
3863         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3864           return V;
3865     }
3866 
3867     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
3868         N1.getOpcode() == ISD::XOR)
3869       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3870         return V;
3871 
3872     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3873       return V;
3874   }
3875 
3876   // Fold remainder of division by a constant.
3877   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
3878       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3879     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3880 
3881     // When division is cheap or optimizing for minimum size,
3882     // fall through to DIVREM creation by skipping this fold.
3883     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
3884       if (N0.getOpcode() == ISD::UREM) {
3885         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3886           return Folded;
3887       } else if (N0.getOpcode() == ISD::SREM) {
3888         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
3889           return Folded;
3890       }
3891     }
3892   }
3893 
3894   // Fold away ALL boolean setcc's.
3895   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
3896     SDValue Temp;
3897     switch (Cond) {
3898     default: llvm_unreachable("Unknown integer setcc!");
3899     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
3900       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3901       N0 = DAG.getNOT(dl, Temp, OpVT);
3902       if (!DCI.isCalledByLegalizer())
3903         DCI.AddToWorklist(Temp.getNode());
3904       break;
3905     case ISD::SETNE:  // X != Y   -->  (X^Y)
3906       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3907       break;
3908     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
3909     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
3910       Temp = DAG.getNOT(dl, N0, OpVT);
3911       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3912       if (!DCI.isCalledByLegalizer())
3913         DCI.AddToWorklist(Temp.getNode());
3914       break;
3915     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
3916     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
3917       Temp = DAG.getNOT(dl, N1, OpVT);
3918       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3919       if (!DCI.isCalledByLegalizer())
3920         DCI.AddToWorklist(Temp.getNode());
3921       break;
3922     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
3923     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
3924       Temp = DAG.getNOT(dl, N0, OpVT);
3925       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3926       if (!DCI.isCalledByLegalizer())
3927         DCI.AddToWorklist(Temp.getNode());
3928       break;
3929     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
3930     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
3931       Temp = DAG.getNOT(dl, N1, OpVT);
3932       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3933       break;
3934     }
3935     if (VT.getScalarType() != MVT::i1) {
3936       if (!DCI.isCalledByLegalizer())
3937         DCI.AddToWorklist(N0.getNode());
3938       // FIXME: If running after legalize, we probably can't do this.
3939       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3940       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3941     }
3942     return N0;
3943   }
3944 
3945   // Could not fold it.
3946   return SDValue();
3947 }
3948 
3949 /// Returns true (and the GlobalValue and the offset) if the node is a
3950 /// GlobalAddress + offset.
3951 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
3952                                     int64_t &Offset) const {
3953 
3954   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
3955 
3956   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
3957     GA = GASD->getGlobal();
3958     Offset += GASD->getOffset();
3959     return true;
3960   }
3961 
3962   if (N->getOpcode() == ISD::ADD) {
3963     SDValue N1 = N->getOperand(0);
3964     SDValue N2 = N->getOperand(1);
3965     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
3966       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
3967         Offset += V->getSExtValue();
3968         return true;
3969       }
3970     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
3971       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
3972         Offset += V->getSExtValue();
3973         return true;
3974       }
3975     }
3976   }
3977 
3978   return false;
3979 }
3980 
3981 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
3982                                           DAGCombinerInfo &DCI) const {
3983   // Default implementation: no optimization.
3984   return SDValue();
3985 }
3986 
3987 //===----------------------------------------------------------------------===//
3988 //  Inline Assembler Implementation Methods
3989 //===----------------------------------------------------------------------===//
3990 
3991 TargetLowering::ConstraintType
3992 TargetLowering::getConstraintType(StringRef Constraint) const {
3993   unsigned S = Constraint.size();
3994 
3995   if (S == 1) {
3996     switch (Constraint[0]) {
3997     default: break;
3998     case 'r':
3999       return C_RegisterClass;
4000     case 'm': // memory
4001     case 'o': // offsetable
4002     case 'V': // not offsetable
4003       return C_Memory;
4004     case 'n': // Simple Integer
4005     case 'E': // Floating Point Constant
4006     case 'F': // Floating Point Constant
4007       return C_Immediate;
4008     case 'i': // Simple Integer or Relocatable Constant
4009     case 's': // Relocatable Constant
4010     case 'p': // Address.
4011     case 'X': // Allow ANY value.
4012     case 'I': // Target registers.
4013     case 'J':
4014     case 'K':
4015     case 'L':
4016     case 'M':
4017     case 'N':
4018     case 'O':
4019     case 'P':
4020     case '<':
4021     case '>':
4022       return C_Other;
4023     }
4024   }
4025 
4026   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4027     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4028       return C_Memory;
4029     return C_Register;
4030   }
4031   return C_Unknown;
4032 }
4033 
4034 /// Try to replace an X constraint, which matches anything, with another that
4035 /// has more specific requirements based on the type of the corresponding
4036 /// operand.
4037 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4038   if (ConstraintVT.isInteger())
4039     return "r";
4040   if (ConstraintVT.isFloatingPoint())
4041     return "f"; // works for many targets
4042   return nullptr;
4043 }
4044 
4045 SDValue TargetLowering::LowerAsmOutputForConstraint(
4046     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4047     SelectionDAG &DAG) const {
4048   return SDValue();
4049 }
4050 
4051 /// Lower the specified operand into the Ops vector.
4052 /// If it is invalid, don't add anything to Ops.
4053 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4054                                                   std::string &Constraint,
4055                                                   std::vector<SDValue> &Ops,
4056                                                   SelectionDAG &DAG) const {
4057 
4058   if (Constraint.length() > 1) return;
4059 
4060   char ConstraintLetter = Constraint[0];
4061   switch (ConstraintLetter) {
4062   default: break;
4063   case 'X':     // Allows any operand; labels (basic block) use this.
4064     if (Op.getOpcode() == ISD::BasicBlock ||
4065         Op.getOpcode() == ISD::TargetBlockAddress) {
4066       Ops.push_back(Op);
4067       return;
4068     }
4069     LLVM_FALLTHROUGH;
4070   case 'i':    // Simple Integer or Relocatable Constant
4071   case 'n':    // Simple Integer
4072   case 's': {  // Relocatable Constant
4073 
4074     GlobalAddressSDNode *GA;
4075     ConstantSDNode *C;
4076     BlockAddressSDNode *BA;
4077     uint64_t Offset = 0;
4078 
4079     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4080     // etc., since getelementpointer is variadic. We can't use
4081     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4082     // while in this case the GA may be furthest from the root node which is
4083     // likely an ISD::ADD.
4084     while (1) {
4085       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4086         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4087                                                  GA->getValueType(0),
4088                                                  Offset + GA->getOffset()));
4089         return;
4090       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4091                  ConstraintLetter != 's') {
4092         // gcc prints these as sign extended.  Sign extend value to 64 bits
4093         // now; without this it would get ZExt'd later in
4094         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4095         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4096         BooleanContent BCont = getBooleanContents(MVT::i64);
4097         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4098                                       : ISD::SIGN_EXTEND;
4099         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4100                                                     : C->getSExtValue();
4101         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4102                                             SDLoc(C), MVT::i64));
4103         return;
4104       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4105                  ConstraintLetter != 'n') {
4106         Ops.push_back(DAG.getTargetBlockAddress(
4107             BA->getBlockAddress(), BA->getValueType(0),
4108             Offset + BA->getOffset(), BA->getTargetFlags()));
4109         return;
4110       } else {
4111         const unsigned OpCode = Op.getOpcode();
4112         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4113           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4114             Op = Op.getOperand(1);
4115           // Subtraction is not commutative.
4116           else if (OpCode == ISD::ADD &&
4117                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4118             Op = Op.getOperand(0);
4119           else
4120             return;
4121           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4122           continue;
4123         }
4124       }
4125       return;
4126     }
4127     break;
4128   }
4129   }
4130 }
4131 
4132 std::pair<unsigned, const TargetRegisterClass *>
4133 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4134                                              StringRef Constraint,
4135                                              MVT VT) const {
4136   if (Constraint.empty() || Constraint[0] != '{')
4137     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4138   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4139 
4140   // Remove the braces from around the name.
4141   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4142 
4143   std::pair<unsigned, const TargetRegisterClass *> R =
4144       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4145 
4146   // Figure out which register class contains this reg.
4147   for (const TargetRegisterClass *RC : RI->regclasses()) {
4148     // If none of the value types for this register class are valid, we
4149     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4150     if (!isLegalRC(*RI, *RC))
4151       continue;
4152 
4153     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4154          I != E; ++I) {
4155       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4156         std::pair<unsigned, const TargetRegisterClass *> S =
4157             std::make_pair(*I, RC);
4158 
4159         // If this register class has the requested value type, return it,
4160         // otherwise keep searching and return the first class found
4161         // if no other is found which explicitly has the requested type.
4162         if (RI->isTypeLegalForClass(*RC, VT))
4163           return S;
4164         if (!R.second)
4165           R = S;
4166       }
4167     }
4168   }
4169 
4170   return R;
4171 }
4172 
4173 //===----------------------------------------------------------------------===//
4174 // Constraint Selection.
4175 
4176 /// Return true of this is an input operand that is a matching constraint like
4177 /// "4".
4178 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4179   assert(!ConstraintCode.empty() && "No known constraint!");
4180   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4181 }
4182 
4183 /// If this is an input matching constraint, this method returns the output
4184 /// operand it matches.
4185 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4186   assert(!ConstraintCode.empty() && "No known constraint!");
4187   return atoi(ConstraintCode.c_str());
4188 }
4189 
4190 /// Split up the constraint string from the inline assembly value into the
4191 /// specific constraints and their prefixes, and also tie in the associated
4192 /// operand values.
4193 /// If this returns an empty vector, and if the constraint string itself
4194 /// isn't empty, there was an error parsing.
4195 TargetLowering::AsmOperandInfoVector
4196 TargetLowering::ParseConstraints(const DataLayout &DL,
4197                                  const TargetRegisterInfo *TRI,
4198                                  ImmutableCallSite CS) const {
4199   /// Information about all of the constraints.
4200   AsmOperandInfoVector ConstraintOperands;
4201   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4202   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4203 
4204   // Do a prepass over the constraints, canonicalizing them, and building up the
4205   // ConstraintOperands list.
4206   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4207   unsigned ResNo = 0; // ResNo - The result number of the next output.
4208 
4209   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4210     ConstraintOperands.emplace_back(std::move(CI));
4211     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4212 
4213     // Update multiple alternative constraint count.
4214     if (OpInfo.multipleAlternatives.size() > maCount)
4215       maCount = OpInfo.multipleAlternatives.size();
4216 
4217     OpInfo.ConstraintVT = MVT::Other;
4218 
4219     // Compute the value type for each operand.
4220     switch (OpInfo.Type) {
4221     case InlineAsm::isOutput:
4222       // Indirect outputs just consume an argument.
4223       if (OpInfo.isIndirect) {
4224         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4225         break;
4226       }
4227 
4228       // The return value of the call is this value.  As such, there is no
4229       // corresponding argument.
4230       assert(!CS.getType()->isVoidTy() &&
4231              "Bad inline asm!");
4232       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
4233         OpInfo.ConstraintVT =
4234             getSimpleValueType(DL, STy->getElementType(ResNo));
4235       } else {
4236         assert(ResNo == 0 && "Asm only has one result!");
4237         OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
4238       }
4239       ++ResNo;
4240       break;
4241     case InlineAsm::isInput:
4242       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4243       break;
4244     case InlineAsm::isClobber:
4245       // Nothing to do.
4246       break;
4247     }
4248 
4249     if (OpInfo.CallOperandVal) {
4250       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4251       if (OpInfo.isIndirect) {
4252         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4253         if (!PtrTy)
4254           report_fatal_error("Indirect operand for inline asm not a pointer!");
4255         OpTy = PtrTy->getElementType();
4256       }
4257 
4258       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4259       if (StructType *STy = dyn_cast<StructType>(OpTy))
4260         if (STy->getNumElements() == 1)
4261           OpTy = STy->getElementType(0);
4262 
4263       // If OpTy is not a single value, it may be a struct/union that we
4264       // can tile with integers.
4265       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4266         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4267         switch (BitSize) {
4268         default: break;
4269         case 1:
4270         case 8:
4271         case 16:
4272         case 32:
4273         case 64:
4274         case 128:
4275           OpInfo.ConstraintVT =
4276               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4277           break;
4278         }
4279       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4280         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4281         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4282       } else {
4283         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4284       }
4285     }
4286   }
4287 
4288   // If we have multiple alternative constraints, select the best alternative.
4289   if (!ConstraintOperands.empty()) {
4290     if (maCount) {
4291       unsigned bestMAIndex = 0;
4292       int bestWeight = -1;
4293       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4294       int weight = -1;
4295       unsigned maIndex;
4296       // Compute the sums of the weights for each alternative, keeping track
4297       // of the best (highest weight) one so far.
4298       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4299         int weightSum = 0;
4300         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4301              cIndex != eIndex; ++cIndex) {
4302           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4303           if (OpInfo.Type == InlineAsm::isClobber)
4304             continue;
4305 
4306           // If this is an output operand with a matching input operand,
4307           // look up the matching input. If their types mismatch, e.g. one
4308           // is an integer, the other is floating point, or their sizes are
4309           // different, flag it as an maCantMatch.
4310           if (OpInfo.hasMatchingInput()) {
4311             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4312             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4313               if ((OpInfo.ConstraintVT.isInteger() !=
4314                    Input.ConstraintVT.isInteger()) ||
4315                   (OpInfo.ConstraintVT.getSizeInBits() !=
4316                    Input.ConstraintVT.getSizeInBits())) {
4317                 weightSum = -1; // Can't match.
4318                 break;
4319               }
4320             }
4321           }
4322           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4323           if (weight == -1) {
4324             weightSum = -1;
4325             break;
4326           }
4327           weightSum += weight;
4328         }
4329         // Update best.
4330         if (weightSum > bestWeight) {
4331           bestWeight = weightSum;
4332           bestMAIndex = maIndex;
4333         }
4334       }
4335 
4336       // Now select chosen alternative in each constraint.
4337       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4338            cIndex != eIndex; ++cIndex) {
4339         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4340         if (cInfo.Type == InlineAsm::isClobber)
4341           continue;
4342         cInfo.selectAlternative(bestMAIndex);
4343       }
4344     }
4345   }
4346 
4347   // Check and hook up tied operands, choose constraint code to use.
4348   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4349        cIndex != eIndex; ++cIndex) {
4350     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4351 
4352     // If this is an output operand with a matching input operand, look up the
4353     // matching input. If their types mismatch, e.g. one is an integer, the
4354     // other is floating point, or their sizes are different, flag it as an
4355     // error.
4356     if (OpInfo.hasMatchingInput()) {
4357       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4358 
4359       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4360         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4361             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4362                                          OpInfo.ConstraintVT);
4363         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4364             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4365                                          Input.ConstraintVT);
4366         if ((OpInfo.ConstraintVT.isInteger() !=
4367              Input.ConstraintVT.isInteger()) ||
4368             (MatchRC.second != InputRC.second)) {
4369           report_fatal_error("Unsupported asm: input constraint"
4370                              " with a matching output constraint of"
4371                              " incompatible type!");
4372         }
4373       }
4374     }
4375   }
4376 
4377   return ConstraintOperands;
4378 }
4379 
4380 /// Return an integer indicating how general CT is.
4381 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4382   switch (CT) {
4383   case TargetLowering::C_Immediate:
4384   case TargetLowering::C_Other:
4385   case TargetLowering::C_Unknown:
4386     return 0;
4387   case TargetLowering::C_Register:
4388     return 1;
4389   case TargetLowering::C_RegisterClass:
4390     return 2;
4391   case TargetLowering::C_Memory:
4392     return 3;
4393   }
4394   llvm_unreachable("Invalid constraint type");
4395 }
4396 
4397 /// Examine constraint type and operand type and determine a weight value.
4398 /// This object must already have been set up with the operand type
4399 /// and the current alternative constraint selected.
4400 TargetLowering::ConstraintWeight
4401   TargetLowering::getMultipleConstraintMatchWeight(
4402     AsmOperandInfo &info, int maIndex) const {
4403   InlineAsm::ConstraintCodeVector *rCodes;
4404   if (maIndex >= (int)info.multipleAlternatives.size())
4405     rCodes = &info.Codes;
4406   else
4407     rCodes = &info.multipleAlternatives[maIndex].Codes;
4408   ConstraintWeight BestWeight = CW_Invalid;
4409 
4410   // Loop over the options, keeping track of the most general one.
4411   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4412     ConstraintWeight weight =
4413       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4414     if (weight > BestWeight)
4415       BestWeight = weight;
4416   }
4417 
4418   return BestWeight;
4419 }
4420 
4421 /// Examine constraint type and operand type and determine a weight value.
4422 /// This object must already have been set up with the operand type
4423 /// and the current alternative constraint selected.
4424 TargetLowering::ConstraintWeight
4425   TargetLowering::getSingleConstraintMatchWeight(
4426     AsmOperandInfo &info, const char *constraint) const {
4427   ConstraintWeight weight = CW_Invalid;
4428   Value *CallOperandVal = info.CallOperandVal;
4429     // If we don't have a value, we can't do a match,
4430     // but allow it at the lowest weight.
4431   if (!CallOperandVal)
4432     return CW_Default;
4433   // Look at the constraint type.
4434   switch (*constraint) {
4435     case 'i': // immediate integer.
4436     case 'n': // immediate integer with a known value.
4437       if (isa<ConstantInt>(CallOperandVal))
4438         weight = CW_Constant;
4439       break;
4440     case 's': // non-explicit intregal immediate.
4441       if (isa<GlobalValue>(CallOperandVal))
4442         weight = CW_Constant;
4443       break;
4444     case 'E': // immediate float if host format.
4445     case 'F': // immediate float.
4446       if (isa<ConstantFP>(CallOperandVal))
4447         weight = CW_Constant;
4448       break;
4449     case '<': // memory operand with autodecrement.
4450     case '>': // memory operand with autoincrement.
4451     case 'm': // memory operand.
4452     case 'o': // offsettable memory operand
4453     case 'V': // non-offsettable memory operand
4454       weight = CW_Memory;
4455       break;
4456     case 'r': // general register.
4457     case 'g': // general register, memory operand or immediate integer.
4458               // note: Clang converts "g" to "imr".
4459       if (CallOperandVal->getType()->isIntegerTy())
4460         weight = CW_Register;
4461       break;
4462     case 'X': // any operand.
4463   default:
4464     weight = CW_Default;
4465     break;
4466   }
4467   return weight;
4468 }
4469 
4470 /// If there are multiple different constraints that we could pick for this
4471 /// operand (e.g. "imr") try to pick the 'best' one.
4472 /// This is somewhat tricky: constraints fall into four classes:
4473 ///    Other         -> immediates and magic values
4474 ///    Register      -> one specific register
4475 ///    RegisterClass -> a group of regs
4476 ///    Memory        -> memory
4477 /// Ideally, we would pick the most specific constraint possible: if we have
4478 /// something that fits into a register, we would pick it.  The problem here
4479 /// is that if we have something that could either be in a register or in
4480 /// memory that use of the register could cause selection of *other*
4481 /// operands to fail: they might only succeed if we pick memory.  Because of
4482 /// this the heuristic we use is:
4483 ///
4484 ///  1) If there is an 'other' constraint, and if the operand is valid for
4485 ///     that constraint, use it.  This makes us take advantage of 'i'
4486 ///     constraints when available.
4487 ///  2) Otherwise, pick the most general constraint present.  This prefers
4488 ///     'm' over 'r', for example.
4489 ///
4490 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4491                              const TargetLowering &TLI,
4492                              SDValue Op, SelectionDAG *DAG) {
4493   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4494   unsigned BestIdx = 0;
4495   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4496   int BestGenerality = -1;
4497 
4498   // Loop over the options, keeping track of the most general one.
4499   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4500     TargetLowering::ConstraintType CType =
4501       TLI.getConstraintType(OpInfo.Codes[i]);
4502 
4503     // If this is an 'other' or 'immediate' constraint, see if the operand is
4504     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4505     // the operand is an integer in the range [0..31] we want to use I (saving a
4506     // load of a register), otherwise we must use 'r'.
4507     if ((CType == TargetLowering::C_Other ||
4508          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4509       assert(OpInfo.Codes[i].size() == 1 &&
4510              "Unhandled multi-letter 'other' constraint");
4511       std::vector<SDValue> ResultOps;
4512       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4513                                        ResultOps, *DAG);
4514       if (!ResultOps.empty()) {
4515         BestType = CType;
4516         BestIdx = i;
4517         break;
4518       }
4519     }
4520 
4521     // Things with matching constraints can only be registers, per gcc
4522     // documentation.  This mainly affects "g" constraints.
4523     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4524       continue;
4525 
4526     // This constraint letter is more general than the previous one, use it.
4527     int Generality = getConstraintGenerality(CType);
4528     if (Generality > BestGenerality) {
4529       BestType = CType;
4530       BestIdx = i;
4531       BestGenerality = Generality;
4532     }
4533   }
4534 
4535   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4536   OpInfo.ConstraintType = BestType;
4537 }
4538 
4539 /// Determines the constraint code and constraint type to use for the specific
4540 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4541 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4542                                             SDValue Op,
4543                                             SelectionDAG *DAG) const {
4544   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4545 
4546   // Single-letter constraints ('r') are very common.
4547   if (OpInfo.Codes.size() == 1) {
4548     OpInfo.ConstraintCode = OpInfo.Codes[0];
4549     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4550   } else {
4551     ChooseConstraint(OpInfo, *this, Op, DAG);
4552   }
4553 
4554   // 'X' matches anything.
4555   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4556     // Labels and constants are handled elsewhere ('X' is the only thing
4557     // that matches labels).  For Functions, the type here is the type of
4558     // the result, which is not what we want to look at; leave them alone.
4559     Value *v = OpInfo.CallOperandVal;
4560     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4561       OpInfo.CallOperandVal = v;
4562       return;
4563     }
4564 
4565     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4566       return;
4567 
4568     // Otherwise, try to resolve it to something we know about by looking at
4569     // the actual operand type.
4570     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4571       OpInfo.ConstraintCode = Repl;
4572       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4573     }
4574   }
4575 }
4576 
4577 /// Given an exact SDIV by a constant, create a multiplication
4578 /// with the multiplicative inverse of the constant.
4579 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4580                               const SDLoc &dl, SelectionDAG &DAG,
4581                               SmallVectorImpl<SDNode *> &Created) {
4582   SDValue Op0 = N->getOperand(0);
4583   SDValue Op1 = N->getOperand(1);
4584   EVT VT = N->getValueType(0);
4585   EVT SVT = VT.getScalarType();
4586   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4587   EVT ShSVT = ShVT.getScalarType();
4588 
4589   bool UseSRA = false;
4590   SmallVector<SDValue, 16> Shifts, Factors;
4591 
4592   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4593     if (C->isNullValue())
4594       return false;
4595     APInt Divisor = C->getAPIntValue();
4596     unsigned Shift = Divisor.countTrailingZeros();
4597     if (Shift) {
4598       Divisor.ashrInPlace(Shift);
4599       UseSRA = true;
4600     }
4601     // Calculate the multiplicative inverse, using Newton's method.
4602     APInt t;
4603     APInt Factor = Divisor;
4604     while ((t = Divisor * Factor) != 1)
4605       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4606     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4607     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4608     return true;
4609   };
4610 
4611   // Collect all magic values from the build vector.
4612   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4613     return SDValue();
4614 
4615   SDValue Shift, Factor;
4616   if (VT.isVector()) {
4617     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4618     Factor = DAG.getBuildVector(VT, dl, Factors);
4619   } else {
4620     Shift = Shifts[0];
4621     Factor = Factors[0];
4622   }
4623 
4624   SDValue Res = Op0;
4625 
4626   // Shift the value upfront if it is even, so the LSB is one.
4627   if (UseSRA) {
4628     // TODO: For UDIV use SRL instead of SRA.
4629     SDNodeFlags Flags;
4630     Flags.setExact(true);
4631     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4632     Created.push_back(Res.getNode());
4633   }
4634 
4635   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4636 }
4637 
4638 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4639                               SelectionDAG &DAG,
4640                               SmallVectorImpl<SDNode *> &Created) const {
4641   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4642   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4643   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4644     return SDValue(N, 0); // Lower SDIV as SDIV
4645   return SDValue();
4646 }
4647 
4648 /// Given an ISD::SDIV node expressing a divide by constant,
4649 /// return a DAG expression to select that will generate the same value by
4650 /// multiplying by a magic number.
4651 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4652 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4653                                   bool IsAfterLegalization,
4654                                   SmallVectorImpl<SDNode *> &Created) const {
4655   SDLoc dl(N);
4656   EVT VT = N->getValueType(0);
4657   EVT SVT = VT.getScalarType();
4658   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4659   EVT ShSVT = ShVT.getScalarType();
4660   unsigned EltBits = VT.getScalarSizeInBits();
4661 
4662   // Check to see if we can do this.
4663   // FIXME: We should be more aggressive here.
4664   if (!isTypeLegal(VT))
4665     return SDValue();
4666 
4667   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4668   if (N->getFlags().hasExact())
4669     return BuildExactSDIV(*this, N, dl, DAG, Created);
4670 
4671   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4672 
4673   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4674     if (C->isNullValue())
4675       return false;
4676 
4677     const APInt &Divisor = C->getAPIntValue();
4678     APInt::ms magics = Divisor.magic();
4679     int NumeratorFactor = 0;
4680     int ShiftMask = -1;
4681 
4682     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4683       // If d is +1/-1, we just multiply the numerator by +1/-1.
4684       NumeratorFactor = Divisor.getSExtValue();
4685       magics.m = 0;
4686       magics.s = 0;
4687       ShiftMask = 0;
4688     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4689       // If d > 0 and m < 0, add the numerator.
4690       NumeratorFactor = 1;
4691     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4692       // If d < 0 and m > 0, subtract the numerator.
4693       NumeratorFactor = -1;
4694     }
4695 
4696     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4697     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4698     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4699     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4700     return true;
4701   };
4702 
4703   SDValue N0 = N->getOperand(0);
4704   SDValue N1 = N->getOperand(1);
4705 
4706   // Collect the shifts / magic values from each element.
4707   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4708     return SDValue();
4709 
4710   SDValue MagicFactor, Factor, Shift, ShiftMask;
4711   if (VT.isVector()) {
4712     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4713     Factor = DAG.getBuildVector(VT, dl, Factors);
4714     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4715     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4716   } else {
4717     MagicFactor = MagicFactors[0];
4718     Factor = Factors[0];
4719     Shift = Shifts[0];
4720     ShiftMask = ShiftMasks[0];
4721   }
4722 
4723   // Multiply the numerator (operand 0) by the magic value.
4724   // FIXME: We should support doing a MUL in a wider type.
4725   SDValue Q;
4726   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4727                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4728     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4729   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4730                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4731     SDValue LoHi =
4732         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4733     Q = SDValue(LoHi.getNode(), 1);
4734   } else
4735     return SDValue(); // No mulhs or equivalent.
4736   Created.push_back(Q.getNode());
4737 
4738   // (Optionally) Add/subtract the numerator using Factor.
4739   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4740   Created.push_back(Factor.getNode());
4741   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4742   Created.push_back(Q.getNode());
4743 
4744   // Shift right algebraic by shift value.
4745   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4746   Created.push_back(Q.getNode());
4747 
4748   // Extract the sign bit, mask it and add it to the quotient.
4749   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4750   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4751   Created.push_back(T.getNode());
4752   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4753   Created.push_back(T.getNode());
4754   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4755 }
4756 
4757 /// Given an ISD::UDIV node expressing a divide by constant,
4758 /// return a DAG expression to select that will generate the same value by
4759 /// multiplying by a magic number.
4760 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4761 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4762                                   bool IsAfterLegalization,
4763                                   SmallVectorImpl<SDNode *> &Created) const {
4764   SDLoc dl(N);
4765   EVT VT = N->getValueType(0);
4766   EVT SVT = VT.getScalarType();
4767   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4768   EVT ShSVT = ShVT.getScalarType();
4769   unsigned EltBits = VT.getScalarSizeInBits();
4770 
4771   // Check to see if we can do this.
4772   // FIXME: We should be more aggressive here.
4773   if (!isTypeLegal(VT))
4774     return SDValue();
4775 
4776   bool UseNPQ = false;
4777   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4778 
4779   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4780     if (C->isNullValue())
4781       return false;
4782     // FIXME: We should use a narrower constant when the upper
4783     // bits are known to be zero.
4784     APInt Divisor = C->getAPIntValue();
4785     APInt::mu magics = Divisor.magicu();
4786     unsigned PreShift = 0, PostShift = 0;
4787 
4788     // If the divisor is even, we can avoid using the expensive fixup by
4789     // shifting the divided value upfront.
4790     if (magics.a != 0 && !Divisor[0]) {
4791       PreShift = Divisor.countTrailingZeros();
4792       // Get magic number for the shifted divisor.
4793       magics = Divisor.lshr(PreShift).magicu(PreShift);
4794       assert(magics.a == 0 && "Should use cheap fixup now");
4795     }
4796 
4797     APInt Magic = magics.m;
4798 
4799     unsigned SelNPQ;
4800     if (magics.a == 0 || Divisor.isOneValue()) {
4801       assert(magics.s < Divisor.getBitWidth() &&
4802              "We shouldn't generate an undefined shift!");
4803       PostShift = magics.s;
4804       SelNPQ = false;
4805     } else {
4806       PostShift = magics.s - 1;
4807       SelNPQ = true;
4808     }
4809 
4810     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4811     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4812     NPQFactors.push_back(
4813         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4814                                : APInt::getNullValue(EltBits),
4815                         dl, SVT));
4816     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4817     UseNPQ |= SelNPQ;
4818     return true;
4819   };
4820 
4821   SDValue N0 = N->getOperand(0);
4822   SDValue N1 = N->getOperand(1);
4823 
4824   // Collect the shifts/magic values from each element.
4825   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4826     return SDValue();
4827 
4828   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4829   if (VT.isVector()) {
4830     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4831     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4832     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4833     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4834   } else {
4835     PreShift = PreShifts[0];
4836     MagicFactor = MagicFactors[0];
4837     PostShift = PostShifts[0];
4838   }
4839 
4840   SDValue Q = N0;
4841   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4842   Created.push_back(Q.getNode());
4843 
4844   // FIXME: We should support doing a MUL in a wider type.
4845   auto GetMULHU = [&](SDValue X, SDValue Y) {
4846     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
4847                             : isOperationLegalOrCustom(ISD::MULHU, VT))
4848       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4849     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
4850                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
4851       SDValue LoHi =
4852           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4853       return SDValue(LoHi.getNode(), 1);
4854     }
4855     return SDValue(); // No mulhu or equivalent
4856   };
4857 
4858   // Multiply the numerator (operand 0) by the magic value.
4859   Q = GetMULHU(Q, MagicFactor);
4860   if (!Q)
4861     return SDValue();
4862 
4863   Created.push_back(Q.getNode());
4864 
4865   if (UseNPQ) {
4866     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4867     Created.push_back(NPQ.getNode());
4868 
4869     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4870     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4871     if (VT.isVector())
4872       NPQ = GetMULHU(NPQ, NPQFactor);
4873     else
4874       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4875 
4876     Created.push_back(NPQ.getNode());
4877 
4878     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4879     Created.push_back(Q.getNode());
4880   }
4881 
4882   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4883   Created.push_back(Q.getNode());
4884 
4885   SDValue One = DAG.getConstant(1, dl, VT);
4886   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4887   return DAG.getSelect(dl, VT, IsOne, N0, Q);
4888 }
4889 
4890 /// If all values in Values that *don't* match the predicate are same 'splat'
4891 /// value, then replace all values with that splat value.
4892 /// Else, if AlternativeReplacement was provided, then replace all values that
4893 /// do match predicate with AlternativeReplacement value.
4894 static void
4895 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4896                           std::function<bool(SDValue)> Predicate,
4897                           SDValue AlternativeReplacement = SDValue()) {
4898   SDValue Replacement;
4899   // Is there a value for which the Predicate does *NOT* match? What is it?
4900   auto SplatValue = llvm::find_if_not(Values, Predicate);
4901   if (SplatValue != Values.end()) {
4902     // Does Values consist only of SplatValue's and values matching Predicate?
4903     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
4904           return Value == *SplatValue || Predicate(Value);
4905         })) // Then we shall replace values matching predicate with SplatValue.
4906       Replacement = *SplatValue;
4907   }
4908   if (!Replacement) {
4909     // Oops, we did not find the "baseline" splat value.
4910     if (!AlternativeReplacement)
4911       return; // Nothing to do.
4912     // Let's replace with provided value then.
4913     Replacement = AlternativeReplacement;
4914   }
4915   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4916 }
4917 
4918 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4919 /// where the divisor is constant and the comparison target is zero,
4920 /// return a DAG expression that will generate the same comparison result
4921 /// using only multiplications, additions and shifts/rotations.
4922 /// Ref: "Hacker's Delight" 10-17.
4923 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4924                                         SDValue CompTargetNode,
4925                                         ISD::CondCode Cond,
4926                                         DAGCombinerInfo &DCI,
4927                                         const SDLoc &DL) const {
4928   SmallVector<SDNode *, 5> Built;
4929   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4930                                          DCI, DL, Built)) {
4931     for (SDNode *N : Built)
4932       DCI.AddToWorklist(N);
4933     return Folded;
4934   }
4935 
4936   return SDValue();
4937 }
4938 
4939 SDValue
4940 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4941                                   SDValue CompTargetNode, ISD::CondCode Cond,
4942                                   DAGCombinerInfo &DCI, const SDLoc &DL,
4943                                   SmallVectorImpl<SDNode *> &Created) const {
4944   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4945   // - D must be constant, with D = D0 * 2^K where D0 is odd
4946   // - P is the multiplicative inverse of D0 modulo 2^W
4947   // - Q = floor(((2^W) - 1) / D)
4948   // where W is the width of the common type of N and D.
4949   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4950          "Only applicable for (in)equality comparisons.");
4951 
4952   SelectionDAG &DAG = DCI.DAG;
4953 
4954   EVT VT = REMNode.getValueType();
4955   EVT SVT = VT.getScalarType();
4956   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4957   EVT ShSVT = ShVT.getScalarType();
4958 
4959   // If MUL is unavailable, we cannot proceed in any case.
4960   if (!isOperationLegalOrCustom(ISD::MUL, VT))
4961     return SDValue();
4962 
4963   bool ComparingWithAllZeros = true;
4964   bool AllComparisonsWithNonZerosAreTautological = true;
4965   bool HadTautologicalLanes = false;
4966   bool AllLanesAreTautological = true;
4967   bool HadEvenDivisor = false;
4968   bool AllDivisorsArePowerOfTwo = true;
4969   bool HadTautologicalInvertedLanes = false;
4970   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
4971 
4972   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
4973     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
4974     if (CDiv->isNullValue())
4975       return false;
4976 
4977     const APInt &D = CDiv->getAPIntValue();
4978     const APInt &Cmp = CCmp->getAPIntValue();
4979 
4980     ComparingWithAllZeros &= Cmp.isNullValue();
4981 
4982     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
4983     // if C2 is not less than C1, the comparison is always false.
4984     // But we will only be able to produce the comparison that will give the
4985     // opposive tautological answer. So this lane would need to be fixed up.
4986     bool TautologicalInvertedLane = D.ule(Cmp);
4987     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
4988 
4989     // If all lanes are tautological (either all divisors are ones, or divisor
4990     // is not greater than the constant we are comparing with),
4991     // we will prefer to avoid the fold.
4992     bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
4993     HadTautologicalLanes |= TautologicalLane;
4994     AllLanesAreTautological &= TautologicalLane;
4995 
4996     // If we are comparing with non-zero, we need'll need  to subtract said
4997     // comparison value from the LHS. But there is no point in doing that if
4998     // every lane where we are comparing with non-zero is tautological..
4999     if (!Cmp.isNullValue())
5000       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5001 
5002     // Decompose D into D0 * 2^K
5003     unsigned K = D.countTrailingZeros();
5004     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5005     APInt D0 = D.lshr(K);
5006 
5007     // D is even if it has trailing zeros.
5008     HadEvenDivisor |= (K != 0);
5009     // D is a power-of-two if D0 is one.
5010     // If all divisors are power-of-two, we will prefer to avoid the fold.
5011     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5012 
5013     // P = inv(D0, 2^W)
5014     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5015     unsigned W = D.getBitWidth();
5016     APInt P = D0.zext(W + 1)
5017                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5018                   .trunc(W);
5019     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5020     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5021 
5022     // Q = floor((2^W - 1) u/ D)
5023     // R = ((2^W - 1) u% D)
5024     APInt Q, R;
5025     APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5026 
5027     // If we are comparing with zero, then that comparison constant is okay,
5028     // else it may need to be one less than that.
5029     if (Cmp.ugt(R))
5030       Q -= 1;
5031 
5032     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5033            "We are expecting that K is always less than all-ones for ShSVT");
5034 
5035     // If the lane is tautological the result can be constant-folded.
5036     if (TautologicalLane) {
5037       // Set P and K amount to a bogus values so we can try to splat them.
5038       P = 0;
5039       K = -1;
5040       // And ensure that comparison constant is tautological,
5041       // it will always compare true/false.
5042       Q = -1;
5043     }
5044 
5045     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5046     KAmts.push_back(
5047         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5048     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5049     return true;
5050   };
5051 
5052   SDValue N = REMNode.getOperand(0);
5053   SDValue D = REMNode.getOperand(1);
5054 
5055   // Collect the values from each element.
5056   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5057     return SDValue();
5058 
5059   // If all lanes are tautological, the result can be constant-folded.
5060   if (AllLanesAreTautological)
5061     return SDValue();
5062 
5063   // If this is a urem by a powers-of-two, avoid the fold since it can be
5064   // best implemented as a bit test.
5065   if (AllDivisorsArePowerOfTwo)
5066     return SDValue();
5067 
5068   SDValue PVal, KVal, QVal;
5069   if (VT.isVector()) {
5070     if (HadTautologicalLanes) {
5071       // Try to turn PAmts into a splat, since we don't care about the values
5072       // that are currently '0'. If we can't, just keep '0'`s.
5073       turnVectorIntoSplatVector(PAmts, isNullConstant);
5074       // Try to turn KAmts into a splat, since we don't care about the values
5075       // that are currently '-1'. If we can't, change them to '0'`s.
5076       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5077                                 DAG.getConstant(0, DL, ShSVT));
5078     }
5079 
5080     PVal = DAG.getBuildVector(VT, DL, PAmts);
5081     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5082     QVal = DAG.getBuildVector(VT, DL, QAmts);
5083   } else {
5084     PVal = PAmts[0];
5085     KVal = KAmts[0];
5086     QVal = QAmts[0];
5087   }
5088 
5089   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5090     if (!isOperationLegalOrCustom(ISD::SUB, VT))
5091       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5092     assert(CompTargetNode.getValueType() == N.getValueType() &&
5093            "Expecting that the types on LHS and RHS of comparisons match.");
5094     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5095   }
5096 
5097   // (mul N, P)
5098   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5099   Created.push_back(Op0.getNode());
5100 
5101   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5102   // divisors as a performance improvement, since rotating by 0 is a no-op.
5103   if (HadEvenDivisor) {
5104     // We need ROTR to do this.
5105     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5106       return SDValue();
5107     SDNodeFlags Flags;
5108     Flags.setExact(true);
5109     // UREM: (rotr (mul N, P), K)
5110     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5111     Created.push_back(Op0.getNode());
5112   }
5113 
5114   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5115   SDValue NewCC =
5116       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5117                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5118   if (!HadTautologicalInvertedLanes)
5119     return NewCC;
5120 
5121   // If any lanes previously compared always-false, the NewCC will give
5122   // always-true result for them, so we need to fixup those lanes.
5123   // Or the other way around for inequality predicate.
5124   assert(VT.isVector() && "Can/should only get here for vectors.");
5125   Created.push_back(NewCC.getNode());
5126 
5127   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5128   // if C2 is not less than C1, the comparison is always false.
5129   // But we have produced the comparison that will give the
5130   // opposive tautological answer. So these lanes would need to be fixed up.
5131   SDValue TautologicalInvertedChannels =
5132       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5133   Created.push_back(TautologicalInvertedChannels.getNode());
5134 
5135   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5136     // If we have a vector select, let's replace the comparison results in the
5137     // affected lanes with the correct tautological result.
5138     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5139                                               DL, SETCCVT, SETCCVT);
5140     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5141                        Replacement, NewCC);
5142   }
5143 
5144   // Else, we can just invert the comparison result in the appropriate lanes.
5145   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5146     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5147                        TautologicalInvertedChannels);
5148 
5149   return SDValue(); // Don't know how to lower.
5150 }
5151 
5152 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5153 /// where the divisor is constant and the comparison target is zero,
5154 /// return a DAG expression that will generate the same comparison result
5155 /// using only multiplications, additions and shifts/rotations.
5156 /// Ref: "Hacker's Delight" 10-17.
5157 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5158                                         SDValue CompTargetNode,
5159                                         ISD::CondCode Cond,
5160                                         DAGCombinerInfo &DCI,
5161                                         const SDLoc &DL) const {
5162   SmallVector<SDNode *, 7> Built;
5163   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5164                                          DCI, DL, Built)) {
5165     assert(Built.size() <= 7 && "Max size prediction failed.");
5166     for (SDNode *N : Built)
5167       DCI.AddToWorklist(N);
5168     return Folded;
5169   }
5170 
5171   return SDValue();
5172 }
5173 
5174 SDValue
5175 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5176                                   SDValue CompTargetNode, ISD::CondCode Cond,
5177                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5178                                   SmallVectorImpl<SDNode *> &Created) const {
5179   // Fold:
5180   //   (seteq/ne (srem N, D), 0)
5181   // To:
5182   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5183   //
5184   // - D must be constant, with D = D0 * 2^K where D0 is odd
5185   // - P is the multiplicative inverse of D0 modulo 2^W
5186   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5187   // - Q = floor((2 * A) / (2^K))
5188   // where W is the width of the common type of N and D.
5189   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5190          "Only applicable for (in)equality comparisons.");
5191 
5192   SelectionDAG &DAG = DCI.DAG;
5193 
5194   EVT VT = REMNode.getValueType();
5195   EVT SVT = VT.getScalarType();
5196   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5197   EVT ShSVT = ShVT.getScalarType();
5198 
5199   // If MUL is unavailable, we cannot proceed in any case.
5200   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5201     return SDValue();
5202 
5203   // TODO: Could support comparing with non-zero too.
5204   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5205   if (!CompTarget || !CompTarget->isNullValue())
5206     return SDValue();
5207 
5208   bool HadIntMinDivisor = false;
5209   bool HadOneDivisor = false;
5210   bool AllDivisorsAreOnes = true;
5211   bool HadEvenDivisor = false;
5212   bool NeedToApplyOffset = false;
5213   bool AllDivisorsArePowerOfTwo = true;
5214   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5215 
5216   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5217     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5218     if (C->isNullValue())
5219       return false;
5220 
5221     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5222 
5223     // WARNING: this fold is only valid for positive divisors!
5224     APInt D = C->getAPIntValue();
5225     if (D.isNegative())
5226       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5227 
5228     HadIntMinDivisor |= D.isMinSignedValue();
5229 
5230     // If all divisors are ones, we will prefer to avoid the fold.
5231     HadOneDivisor |= D.isOneValue();
5232     AllDivisorsAreOnes &= D.isOneValue();
5233 
5234     // Decompose D into D0 * 2^K
5235     unsigned K = D.countTrailingZeros();
5236     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5237     APInt D0 = D.lshr(K);
5238 
5239     if (!D.isMinSignedValue()) {
5240       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5241       // we don't care about this lane in this fold, we'll special-handle it.
5242       HadEvenDivisor |= (K != 0);
5243     }
5244 
5245     // D is a power-of-two if D0 is one. This includes INT_MIN.
5246     // If all divisors are power-of-two, we will prefer to avoid the fold.
5247     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5248 
5249     // P = inv(D0, 2^W)
5250     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5251     unsigned W = D.getBitWidth();
5252     APInt P = D0.zext(W + 1)
5253                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5254                   .trunc(W);
5255     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5256     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5257 
5258     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5259     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5260     A.clearLowBits(K);
5261 
5262     if (!D.isMinSignedValue()) {
5263       // If divisor INT_MIN, then we don't care about this lane in this fold,
5264       // we'll special-handle it.
5265       NeedToApplyOffset |= A != 0;
5266     }
5267 
5268     // Q = floor((2 * A) / (2^K))
5269     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5270 
5271     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5272            "We are expecting that A is always less than all-ones for SVT");
5273     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5274            "We are expecting that K is always less than all-ones for ShSVT");
5275 
5276     // If the divisor is 1 the result can be constant-folded. Likewise, we
5277     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5278     if (D.isOneValue()) {
5279       // Set P, A and K to a bogus values so we can try to splat them.
5280       P = 0;
5281       A = -1;
5282       K = -1;
5283 
5284       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5285       Q = -1;
5286     }
5287 
5288     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5289     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5290     KAmts.push_back(
5291         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5292     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5293     return true;
5294   };
5295 
5296   SDValue N = REMNode.getOperand(0);
5297   SDValue D = REMNode.getOperand(1);
5298 
5299   // Collect the values from each element.
5300   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5301     return SDValue();
5302 
5303   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5304   if (AllDivisorsAreOnes)
5305     return SDValue();
5306 
5307   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5308   // since it can be best implemented as a bit test.
5309   if (AllDivisorsArePowerOfTwo)
5310     return SDValue();
5311 
5312   SDValue PVal, AVal, KVal, QVal;
5313   if (VT.isVector()) {
5314     if (HadOneDivisor) {
5315       // Try to turn PAmts into a splat, since we don't care about the values
5316       // that are currently '0'. If we can't, just keep '0'`s.
5317       turnVectorIntoSplatVector(PAmts, isNullConstant);
5318       // Try to turn AAmts into a splat, since we don't care about the
5319       // values that are currently '-1'. If we can't, change them to '0'`s.
5320       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5321                                 DAG.getConstant(0, DL, SVT));
5322       // Try to turn KAmts into a splat, since we don't care about the values
5323       // that are currently '-1'. If we can't, change them to '0'`s.
5324       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5325                                 DAG.getConstant(0, DL, ShSVT));
5326     }
5327 
5328     PVal = DAG.getBuildVector(VT, DL, PAmts);
5329     AVal = DAG.getBuildVector(VT, DL, AAmts);
5330     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5331     QVal = DAG.getBuildVector(VT, DL, QAmts);
5332   } else {
5333     PVal = PAmts[0];
5334     AVal = AAmts[0];
5335     KVal = KAmts[0];
5336     QVal = QAmts[0];
5337   }
5338 
5339   // (mul N, P)
5340   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5341   Created.push_back(Op0.getNode());
5342 
5343   if (NeedToApplyOffset) {
5344     // We need ADD to do this.
5345     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5346       return SDValue();
5347 
5348     // (add (mul N, P), A)
5349     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5350     Created.push_back(Op0.getNode());
5351   }
5352 
5353   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5354   // divisors as a performance improvement, since rotating by 0 is a no-op.
5355   if (HadEvenDivisor) {
5356     // We need ROTR to do this.
5357     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5358       return SDValue();
5359     SDNodeFlags Flags;
5360     Flags.setExact(true);
5361     // SREM: (rotr (add (mul N, P), A), K)
5362     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5363     Created.push_back(Op0.getNode());
5364   }
5365 
5366   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5367   SDValue Fold =
5368       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5369                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5370 
5371   // If we didn't have lanes with INT_MIN divisor, then we're done.
5372   if (!HadIntMinDivisor)
5373     return Fold;
5374 
5375   // That fold is only valid for positive divisors. Which effectively means,
5376   // it is invalid for INT_MIN divisors. So if we have such a lane,
5377   // we must fix-up results for said lanes.
5378   assert(VT.isVector() && "Can/should only get here for vectors.");
5379 
5380   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5381       !isOperationLegalOrCustom(ISD::AND, VT) ||
5382       !isOperationLegalOrCustom(Cond, VT) ||
5383       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5384     return SDValue();
5385 
5386   Created.push_back(Fold.getNode());
5387 
5388   SDValue IntMin = DAG.getConstant(
5389       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5390   SDValue IntMax = DAG.getConstant(
5391       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5392   SDValue Zero =
5393       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5394 
5395   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5396   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5397   Created.push_back(DivisorIsIntMin.getNode());
5398 
5399   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5400   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5401   Created.push_back(Masked.getNode());
5402   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5403   Created.push_back(MaskedIsZero.getNode());
5404 
5405   // To produce final result we need to blend 2 vectors: 'SetCC' and
5406   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5407   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5408   // constant-folded, select can get lowered to a shuffle with constant mask.
5409   SDValue Blended =
5410       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5411 
5412   return Blended;
5413 }
5414 
5415 bool TargetLowering::
5416 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5417   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5418     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5419                                 "be a constant integer");
5420     return true;
5421   }
5422 
5423   return false;
5424 }
5425 
5426 char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
5427                                         bool LegalOperations, bool ForCodeSize,
5428                                         unsigned Depth) const {
5429   // fneg is removable even if it has multiple uses.
5430   if (Op.getOpcode() == ISD::FNEG)
5431     return 2;
5432 
5433   // Don't allow anything with multiple uses unless we know it is free.
5434   EVT VT = Op.getValueType();
5435   const SDNodeFlags Flags = Op->getFlags();
5436   const TargetOptions &Options = DAG.getTarget().Options;
5437   if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
5438                            isFPExtFree(VT, Op.getOperand(0).getValueType())))
5439     return 0;
5440 
5441   // Don't recurse exponentially.
5442   if (Depth > SelectionDAG::MaxRecursionDepth)
5443     return 0;
5444 
5445   switch (Op.getOpcode()) {
5446   case ISD::ConstantFP: {
5447     if (!LegalOperations)
5448       return 1;
5449 
5450     // Don't invert constant FP values after legalization unless the target says
5451     // the negated constant is legal.
5452     return isOperationLegal(ISD::ConstantFP, VT) ||
5453            isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5454                         ForCodeSize);
5455   }
5456   case ISD::BUILD_VECTOR: {
5457     // Only permit BUILD_VECTOR of constants.
5458     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5459           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5460         }))
5461       return 0;
5462     if (!LegalOperations)
5463       return 1;
5464     if (isOperationLegal(ISD::ConstantFP, VT) &&
5465         isOperationLegal(ISD::BUILD_VECTOR, VT))
5466       return 1;
5467     return llvm::all_of(Op->op_values(), [&](SDValue N) {
5468       return N.isUndef() ||
5469              isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5470                           ForCodeSize);
5471     });
5472   }
5473   case ISD::FADD:
5474     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5475       return 0;
5476 
5477     // After operation legalization, it might not be legal to create new FSUBs.
5478     if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
5479       return 0;
5480 
5481     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5482     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5483                                     ForCodeSize, Depth + 1))
5484       return V;
5485     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5486     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5487                               ForCodeSize, Depth + 1);
5488   case ISD::FSUB:
5489     // We can't turn -(A-B) into B-A when we honor signed zeros.
5490     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5491       return 0;
5492 
5493     // fold (fneg (fsub A, B)) -> (fsub B, A)
5494     return 1;
5495 
5496   case ISD::FMUL:
5497   case ISD::FDIV:
5498     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
5499     if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5500                                     ForCodeSize, Depth + 1))
5501       return V;
5502 
5503     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5504     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5505       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5506         return 0;
5507 
5508     return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5509                               ForCodeSize, Depth + 1);
5510 
5511   case ISD::FMA:
5512   case ISD::FMAD: {
5513     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5514       return 0;
5515 
5516     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5517     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5518     char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
5519                                  ForCodeSize, Depth + 1);
5520     if (!V2)
5521       return 0;
5522 
5523     // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
5524     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5525                                  ForCodeSize, Depth + 1);
5526     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5527                                  ForCodeSize, Depth + 1);
5528     char V01 = std::max(V0, V1);
5529     return V01 ? std::max(V01, V2) : 0;
5530   }
5531 
5532   case ISD::FP_EXTEND:
5533   case ISD::FP_ROUND:
5534   case ISD::FSIN:
5535     return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5536                               ForCodeSize, Depth + 1);
5537   }
5538 
5539   return 0;
5540 }
5541 
5542 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5543                                              bool LegalOperations,
5544                                              bool ForCodeSize,
5545                                              unsigned Depth) const {
5546   // fneg is removable even if it has multiple uses.
5547   if (Op.getOpcode() == ISD::FNEG)
5548     return Op.getOperand(0);
5549 
5550   assert(Depth <= SelectionDAG::MaxRecursionDepth &&
5551          "getNegatedExpression doesn't match isNegatibleForFree");
5552   const SDNodeFlags Flags = Op->getFlags();
5553 
5554   switch (Op.getOpcode()) {
5555   case ISD::ConstantFP: {
5556     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5557     V.changeSign();
5558     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
5559   }
5560   case ISD::BUILD_VECTOR: {
5561     SmallVector<SDValue, 4> Ops;
5562     for (SDValue C : Op->op_values()) {
5563       if (C.isUndef()) {
5564         Ops.push_back(C);
5565         continue;
5566       }
5567       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5568       V.changeSign();
5569       Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
5570     }
5571     return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
5572   }
5573   case ISD::FADD:
5574     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5575             Flags.hasNoSignedZeros()) &&
5576            "Expected NSZ fp-flag");
5577 
5578     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5579     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5580                            Depth + 1))
5581       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5582                          getNegatedExpression(Op.getOperand(0), DAG,
5583                                               LegalOperations, ForCodeSize,
5584                                               Depth + 1),
5585                          Op.getOperand(1), Flags);
5586     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5587     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5588                        getNegatedExpression(Op.getOperand(1), DAG,
5589                                             LegalOperations, ForCodeSize,
5590                                             Depth + 1),
5591                        Op.getOperand(0), Flags);
5592   case ISD::FSUB:
5593     // fold (fneg (fsub 0, B)) -> B
5594     if (ConstantFPSDNode *N0CFP =
5595             isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
5596       if (N0CFP->isZero())
5597         return Op.getOperand(1);
5598 
5599     // fold (fneg (fsub A, B)) -> (fsub B, A)
5600     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5601                        Op.getOperand(1), Op.getOperand(0), Flags);
5602 
5603   case ISD::FMUL:
5604   case ISD::FDIV:
5605     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5606     if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5607                            Depth + 1))
5608       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5609                          getNegatedExpression(Op.getOperand(0), DAG,
5610                                               LegalOperations, ForCodeSize,
5611                                               Depth + 1),
5612                          Op.getOperand(1), Flags);
5613 
5614     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5615     return DAG.getNode(
5616         Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
5617         getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5618                              ForCodeSize, Depth + 1),
5619         Flags);
5620 
5621   case ISD::FMA:
5622   case ISD::FMAD: {
5623     assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5624             Flags.hasNoSignedZeros()) &&
5625            "Expected NSZ fp-flag");
5626 
5627     SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
5628                                         ForCodeSize, Depth + 1);
5629 
5630     char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5631                                  ForCodeSize, Depth + 1);
5632     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5633                                  ForCodeSize, Depth + 1);
5634     // TODO: This is a hack. It is possible that costs have changed between now
5635     //       and the initial calls to isNegatibleForFree(). That is because we
5636     //       are rewriting the expression, and that may change the number of
5637     //       uses (and therefore the cost) of values. If the negation costs are
5638     //       equal, only negate this value if it is a constant. Otherwise, try
5639     //       operand 1. A better fix would eliminate uses as a cost factor or
5640     //       track the change in uses as we rewrite the expression.
5641     if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
5642       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5643       SDValue Neg0 = getNegatedExpression(
5644           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
5645       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
5646                          Op.getOperand(1), Neg2, Flags);
5647     }
5648 
5649     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5650     SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5651                                         ForCodeSize, Depth + 1);
5652     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5653                        Op.getOperand(0), Neg1, Neg2, Flags);
5654   }
5655 
5656   case ISD::FP_EXTEND:
5657   case ISD::FSIN:
5658     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5659                        getNegatedExpression(Op.getOperand(0), DAG,
5660                                             LegalOperations, ForCodeSize,
5661                                             Depth + 1));
5662   case ISD::FP_ROUND:
5663     return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
5664                        getNegatedExpression(Op.getOperand(0), DAG,
5665                                             LegalOperations, ForCodeSize,
5666                                             Depth + 1),
5667                        Op.getOperand(1));
5668   }
5669 
5670   llvm_unreachable("Unknown code");
5671 }
5672 
5673 //===----------------------------------------------------------------------===//
5674 // Legalization Utilities
5675 //===----------------------------------------------------------------------===//
5676 
5677 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5678                                     SDValue LHS, SDValue RHS,
5679                                     SmallVectorImpl<SDValue> &Result,
5680                                     EVT HiLoVT, SelectionDAG &DAG,
5681                                     MulExpansionKind Kind, SDValue LL,
5682                                     SDValue LH, SDValue RL, SDValue RH) const {
5683   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5684          Opcode == ISD::SMUL_LOHI);
5685 
5686   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5687                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5688   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5689                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5690   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5691                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5692   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5693                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5694 
5695   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5696     return false;
5697 
5698   unsigned OuterBitSize = VT.getScalarSizeInBits();
5699   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5700   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5701   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5702 
5703   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5704   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5705          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5706 
5707   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5708   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5709                           bool Signed) -> bool {
5710     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5711       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5712       Hi = SDValue(Lo.getNode(), 1);
5713       return true;
5714     }
5715     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5716       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5717       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5718       return true;
5719     }
5720     return false;
5721   };
5722 
5723   SDValue Lo, Hi;
5724 
5725   if (!LL.getNode() && !RL.getNode() &&
5726       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5727     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5728     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5729   }
5730 
5731   if (!LL.getNode())
5732     return false;
5733 
5734   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5735   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5736       DAG.MaskedValueIsZero(RHS, HighMask)) {
5737     // The inputs are both zero-extended.
5738     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5739       Result.push_back(Lo);
5740       Result.push_back(Hi);
5741       if (Opcode != ISD::MUL) {
5742         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5743         Result.push_back(Zero);
5744         Result.push_back(Zero);
5745       }
5746       return true;
5747     }
5748   }
5749 
5750   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5751       RHSSB > InnerBitSize) {
5752     // The input values are both sign-extended.
5753     // TODO non-MUL case?
5754     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5755       Result.push_back(Lo);
5756       Result.push_back(Hi);
5757       return true;
5758     }
5759   }
5760 
5761   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5762   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5763   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5764     // FIXME getShiftAmountTy does not always return a sensible result when VT
5765     // is an illegal type, and so the type may be too small to fit the shift
5766     // amount. Override it with i32. The shift will have to be legalized.
5767     ShiftAmountTy = MVT::i32;
5768   }
5769   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5770 
5771   if (!LH.getNode() && !RH.getNode() &&
5772       isOperationLegalOrCustom(ISD::SRL, VT) &&
5773       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5774     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5775     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
5776     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
5777     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
5778   }
5779 
5780   if (!LH.getNode())
5781     return false;
5782 
5783   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
5784     return false;
5785 
5786   Result.push_back(Lo);
5787 
5788   if (Opcode == ISD::MUL) {
5789     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
5790     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
5791     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
5792     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
5793     Result.push_back(Hi);
5794     return true;
5795   }
5796 
5797   // Compute the full width result.
5798   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
5799     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
5800     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5801     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
5802     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
5803   };
5804 
5805   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5806   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
5807     return false;
5808 
5809   // This is effectively the add part of a multiply-add of half-sized operands,
5810   // so it cannot overflow.
5811   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5812 
5813   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
5814     return false;
5815 
5816   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5817   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5818 
5819   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
5820                   isOperationLegalOrCustom(ISD::ADDE, VT));
5821   if (UseGlue)
5822     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
5823                        Merge(Lo, Hi));
5824   else
5825     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
5826                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
5827 
5828   SDValue Carry = Next.getValue(1);
5829   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5830   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5831 
5832   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
5833     return false;
5834 
5835   if (UseGlue)
5836     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
5837                      Carry);
5838   else
5839     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
5840                      Zero, Carry);
5841 
5842   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5843 
5844   if (Opcode == ISD::SMUL_LOHI) {
5845     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5846                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
5847     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
5848 
5849     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5850                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
5851     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
5852   }
5853 
5854   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5855   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5856   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5857   return true;
5858 }
5859 
5860 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5861                                SelectionDAG &DAG, MulExpansionKind Kind,
5862                                SDValue LL, SDValue LH, SDValue RL,
5863                                SDValue RH) const {
5864   SmallVector<SDValue, 2> Result;
5865   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
5866                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
5867                            DAG, Kind, LL, LH, RL, RH);
5868   if (Ok) {
5869     assert(Result.size() == 2);
5870     Lo = Result[0];
5871     Hi = Result[1];
5872   }
5873   return Ok;
5874 }
5875 
5876 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
5877                                        SelectionDAG &DAG) const {
5878   EVT VT = Node->getValueType(0);
5879 
5880   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5881                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5882                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5883                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
5884     return false;
5885 
5886   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5887   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5888   SDValue X = Node->getOperand(0);
5889   SDValue Y = Node->getOperand(1);
5890   SDValue Z = Node->getOperand(2);
5891 
5892   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5893   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
5894   SDLoc DL(SDValue(Node, 0));
5895 
5896   EVT ShVT = Z.getValueType();
5897   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5898   SDValue Zero = DAG.getConstant(0, DL, ShVT);
5899 
5900   SDValue ShAmt;
5901   if (isPowerOf2_32(EltSizeInBits)) {
5902     SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5903     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
5904   } else {
5905     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
5906   }
5907 
5908   SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
5909   SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
5910   SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5911   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
5912 
5913   // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5914   // and that is undefined. We must compare and select to avoid UB.
5915   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
5916 
5917   // For fshl, 0-shift returns the 1st arg (X).
5918   // For fshr, 0-shift returns the 2nd arg (Y).
5919   SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
5920   Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
5921   return true;
5922 }
5923 
5924 // TODO: Merge with expandFunnelShift.
5925 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
5926                                SelectionDAG &DAG) const {
5927   EVT VT = Node->getValueType(0);
5928   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5929   bool IsLeft = Node->getOpcode() == ISD::ROTL;
5930   SDValue Op0 = Node->getOperand(0);
5931   SDValue Op1 = Node->getOperand(1);
5932   SDLoc DL(SDValue(Node, 0));
5933 
5934   EVT ShVT = Op1.getValueType();
5935   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5936 
5937   // If a rotate in the other direction is legal, use it.
5938   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
5939   if (isOperationLegal(RevRot, VT)) {
5940     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5941     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
5942     return true;
5943   }
5944 
5945   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5946                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5947                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5948                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
5949                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
5950     return false;
5951 
5952   // Otherwise,
5953   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
5954   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
5955   //
5956   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
5957          "Expecting the type bitwidth to be a power of 2");
5958   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
5959   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
5960   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5961   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5962   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
5963   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
5964   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
5965                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
5966   return true;
5967 }
5968 
5969 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
5970                                       SelectionDAG &DAG) const {
5971   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
5972   SDValue Src = Node->getOperand(OpNo);
5973   EVT SrcVT = Src.getValueType();
5974   EVT DstVT = Node->getValueType(0);
5975   SDLoc dl(SDValue(Node, 0));
5976 
5977   // FIXME: Only f32 to i64 conversions are supported.
5978   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
5979     return false;
5980 
5981   if (Node->isStrictFPOpcode())
5982     // When a NaN is converted to an integer a trap is allowed. We can't
5983     // use this expansion here because it would eliminate that trap. Other
5984     // traps are also allowed and cannot be eliminated. See
5985     // IEEE 754-2008 sec 5.8.
5986     return false;
5987 
5988   // Expand f32 -> i64 conversion
5989   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5990   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
5991   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5992   EVT IntVT = SrcVT.changeTypeToInteger();
5993   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
5994 
5995   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
5996   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
5997   SDValue Bias = DAG.getConstant(127, dl, IntVT);
5998   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
5999   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6000   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6001 
6002   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6003 
6004   SDValue ExponentBits = DAG.getNode(
6005       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6006       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6007   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6008 
6009   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6010                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6011                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6012   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6013 
6014   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6015                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6016                           DAG.getConstant(0x00800000, dl, IntVT));
6017 
6018   R = DAG.getZExtOrTrunc(R, dl, DstVT);
6019 
6020   R = DAG.getSelectCC(
6021       dl, Exponent, ExponentLoBit,
6022       DAG.getNode(ISD::SHL, dl, DstVT, R,
6023                   DAG.getZExtOrTrunc(
6024                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6025                       dl, IntShVT)),
6026       DAG.getNode(ISD::SRL, dl, DstVT, R,
6027                   DAG.getZExtOrTrunc(
6028                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6029                       dl, IntShVT)),
6030       ISD::SETGT);
6031 
6032   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6033                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6034 
6035   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6036                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6037   return true;
6038 }
6039 
6040 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6041                                       SDValue &Chain,
6042                                       SelectionDAG &DAG) const {
6043   SDLoc dl(SDValue(Node, 0));
6044   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6045   SDValue Src = Node->getOperand(OpNo);
6046 
6047   EVT SrcVT = Src.getValueType();
6048   EVT DstVT = Node->getValueType(0);
6049   EVT SetCCVT =
6050       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6051 
6052   // Only expand vector types if we have the appropriate vector bit operations.
6053   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6054                                                    ISD::FP_TO_SINT;
6055   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6056                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6057     return false;
6058 
6059   // If the maximum float value is smaller then the signed integer range,
6060   // the destination signmask can't be represented by the float, so we can
6061   // just use FP_TO_SINT directly.
6062   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6063   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6064   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6065   if (APFloat::opOverflow &
6066       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6067     if (Node->isStrictFPOpcode()) {
6068       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6069                            { Node->getOperand(0), Src });
6070       Chain = Result.getValue(1);
6071     } else
6072       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6073     return true;
6074   }
6075 
6076   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6077   SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6078 
6079   bool Strict = Node->isStrictFPOpcode() ||
6080                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6081 
6082   if (Strict) {
6083     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6084     // signmask then offset (the result of which should be fully representable).
6085     // Sel = Src < 0x8000000000000000
6086     // FltOfs = select Sel, 0, 0x8000000000000000
6087     // IntOfs = select Sel, 0, 0x8000000000000000
6088     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6089 
6090     // TODO: Should any fast-math-flags be set for the FSUB?
6091     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6092                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6093     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6094                                    DAG.getConstant(0, dl, DstVT),
6095                                    DAG.getConstant(SignMask, dl, DstVT));
6096     SDValue SInt;
6097     if (Node->isStrictFPOpcode()) {
6098       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6099                                 { Node->getOperand(0), Src, FltOfs });
6100       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6101                          { Val.getValue(1), Val });
6102       Chain = SInt.getValue(1);
6103     } else {
6104       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6105       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6106     }
6107     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6108   } else {
6109     // Expand based on maximum range of FP_TO_SINT:
6110     // True = fp_to_sint(Src)
6111     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6112     // Result = select (Src < 0x8000000000000000), True, False
6113 
6114     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6115     // TODO: Should any fast-math-flags be set for the FSUB?
6116     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6117                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6118     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6119                         DAG.getConstant(SignMask, dl, DstVT));
6120     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6121   }
6122   return true;
6123 }
6124 
6125 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6126                                       SDValue &Chain,
6127                                       SelectionDAG &DAG) const {
6128   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6129   SDValue Src = Node->getOperand(OpNo);
6130   EVT SrcVT = Src.getValueType();
6131   EVT DstVT = Node->getValueType(0);
6132 
6133   if (SrcVT.getScalarType() != MVT::i64)
6134     return false;
6135 
6136   SDLoc dl(SDValue(Node, 0));
6137   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6138 
6139   if (DstVT.getScalarType() == MVT::f32) {
6140     // Only expand vector types if we have the appropriate vector bit
6141     // operations.
6142     if (SrcVT.isVector() &&
6143         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6144          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6145          !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
6146          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6147          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6148       return false;
6149 
6150     // For unsigned conversions, convert them to signed conversions using the
6151     // algorithm from the x86_64 __floatundidf in compiler_rt.
6152     SDValue Fast;
6153     if (Node->isStrictFPOpcode()) {
6154       Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
6155                          {Node->getOperand(0), Src});
6156       Chain = SDValue(Fast.getNode(), 1);
6157     } else
6158       Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
6159 
6160     SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
6161     SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
6162     SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
6163     SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
6164     SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
6165 
6166     SDValue Slow;
6167     if (Node->isStrictFPOpcode()) {
6168       SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
6169                                     {DstVT, MVT::Other}, {Chain, Or});
6170       Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
6171                          { SignCvt.getValue(1), SignCvt, SignCvt });
6172       Chain = Slow.getValue(1);
6173     } else {
6174       SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
6175       Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
6176     }
6177 
6178     // TODO: This really should be implemented using a branch rather than a
6179     // select.  We happen to get lucky and machinesink does the right
6180     // thing most of the time.  This would be a good candidate for a
6181     // pseudo-op, or, even better, for whole-function isel.
6182     EVT SetCCVT =
6183         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6184 
6185     SDValue SignBitTest = DAG.getSetCC(
6186         dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
6187     Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
6188     return true;
6189   }
6190 
6191   if (DstVT.getScalarType() == MVT::f64) {
6192     // Only expand vector types if we have the appropriate vector bit
6193     // operations.
6194     if (SrcVT.isVector() &&
6195         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6196          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6197          !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6198          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6199          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6200       return false;
6201 
6202     // Implementation of unsigned i64 to f64 following the algorithm in
6203     // __floatundidf in compiler_rt. This implementation has the advantage
6204     // of performing rounding correctly, both in the default rounding mode
6205     // and in all alternate rounding modes.
6206     SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6207     SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6208         BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6209     SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6210     SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6211     SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6212 
6213     SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6214     SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6215     SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6216     SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6217     SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6218     SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6219     if (Node->isStrictFPOpcode()) {
6220       SDValue HiSub =
6221           DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
6222                       {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
6223       Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
6224                            {HiSub.getValue(1), LoFlt, HiSub});
6225       Chain = Result.getValue(1);
6226     } else {
6227       SDValue HiSub =
6228           DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6229       Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6230     }
6231     return true;
6232   }
6233 
6234   return false;
6235 }
6236 
6237 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6238                                               SelectionDAG &DAG) const {
6239   SDLoc dl(Node);
6240   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6241     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6242   EVT VT = Node->getValueType(0);
6243   if (isOperationLegalOrCustom(NewOp, VT)) {
6244     SDValue Quiet0 = Node->getOperand(0);
6245     SDValue Quiet1 = Node->getOperand(1);
6246 
6247     if (!Node->getFlags().hasNoNaNs()) {
6248       // Insert canonicalizes if it's possible we need to quiet to get correct
6249       // sNaN behavior.
6250       if (!DAG.isKnownNeverSNaN(Quiet0)) {
6251         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6252                              Node->getFlags());
6253       }
6254       if (!DAG.isKnownNeverSNaN(Quiet1)) {
6255         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6256                              Node->getFlags());
6257       }
6258     }
6259 
6260     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6261   }
6262 
6263   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6264   // instead if there are no NaNs.
6265   if (Node->getFlags().hasNoNaNs()) {
6266     unsigned IEEE2018Op =
6267         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6268     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6269       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6270                          Node->getOperand(1), Node->getFlags());
6271     }
6272   }
6273 
6274   // If none of the above worked, but there are no NaNs, then expand to
6275   // a compare/select sequence.  This is required for correctness since
6276   // InstCombine might have canonicalized a fcmp+select sequence to a
6277   // FMINNUM/FMAXNUM node.  If we were to fall through to the default
6278   // expansion to libcall, we might introduce a link-time dependency
6279   // on libm into a file that originally did not have one.
6280   if (Node->getFlags().hasNoNaNs()) {
6281     ISD::CondCode Pred =
6282         Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6283     SDValue Op1 = Node->getOperand(0);
6284     SDValue Op2 = Node->getOperand(1);
6285     SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6286     // Copy FMF flags, but always set the no-signed-zeros flag
6287     // as this is implied by the FMINNUM/FMAXNUM semantics.
6288     SDNodeFlags Flags = Node->getFlags();
6289     Flags.setNoSignedZeros(true);
6290     SelCC->setFlags(Flags);
6291     return SelCC;
6292   }
6293 
6294   return SDValue();
6295 }
6296 
6297 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6298                                  SelectionDAG &DAG) const {
6299   SDLoc dl(Node);
6300   EVT VT = Node->getValueType(0);
6301   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6302   SDValue Op = Node->getOperand(0);
6303   unsigned Len = VT.getScalarSizeInBits();
6304   assert(VT.isInteger() && "CTPOP not implemented for this type.");
6305 
6306   // TODO: Add support for irregular type lengths.
6307   if (!(Len <= 128 && Len % 8 == 0))
6308     return false;
6309 
6310   // Only expand vector types if we have the appropriate vector bit operations.
6311   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6312                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6313                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6314                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6315                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6316     return false;
6317 
6318   // This is the "best" algorithm from
6319   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6320   SDValue Mask55 =
6321       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6322   SDValue Mask33 =
6323       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6324   SDValue Mask0F =
6325       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6326   SDValue Mask01 =
6327       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6328 
6329   // v = v - ((v >> 1) & 0x55555555...)
6330   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6331                    DAG.getNode(ISD::AND, dl, VT,
6332                                DAG.getNode(ISD::SRL, dl, VT, Op,
6333                                            DAG.getConstant(1, dl, ShVT)),
6334                                Mask55));
6335   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6336   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6337                    DAG.getNode(ISD::AND, dl, VT,
6338                                DAG.getNode(ISD::SRL, dl, VT, Op,
6339                                            DAG.getConstant(2, dl, ShVT)),
6340                                Mask33));
6341   // v = (v + (v >> 4)) & 0x0F0F0F0F...
6342   Op = DAG.getNode(ISD::AND, dl, VT,
6343                    DAG.getNode(ISD::ADD, dl, VT, Op,
6344                                DAG.getNode(ISD::SRL, dl, VT, Op,
6345                                            DAG.getConstant(4, dl, ShVT))),
6346                    Mask0F);
6347   // v = (v * 0x01010101...) >> (Len - 8)
6348   if (Len > 8)
6349     Op =
6350         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6351                     DAG.getConstant(Len - 8, dl, ShVT));
6352 
6353   Result = Op;
6354   return true;
6355 }
6356 
6357 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6358                                 SelectionDAG &DAG) const {
6359   SDLoc dl(Node);
6360   EVT VT = Node->getValueType(0);
6361   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6362   SDValue Op = Node->getOperand(0);
6363   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6364 
6365   // If the non-ZERO_UNDEF version is supported we can use that instead.
6366   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6367       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6368     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6369     return true;
6370   }
6371 
6372   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6373   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6374     EVT SetCCVT =
6375         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6376     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6377     SDValue Zero = DAG.getConstant(0, dl, VT);
6378     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6379     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6380                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6381     return true;
6382   }
6383 
6384   // Only expand vector types if we have the appropriate vector bit operations.
6385   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6386                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6387                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6388                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6389     return false;
6390 
6391   // for now, we do this:
6392   // x = x | (x >> 1);
6393   // x = x | (x >> 2);
6394   // ...
6395   // x = x | (x >>16);
6396   // x = x | (x >>32); // for 64-bit input
6397   // return popcount(~x);
6398   //
6399   // Ref: "Hacker's Delight" by Henry Warren
6400   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6401     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6402     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6403                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6404   }
6405   Op = DAG.getNOT(dl, Op, VT);
6406   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6407   return true;
6408 }
6409 
6410 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6411                                 SelectionDAG &DAG) const {
6412   SDLoc dl(Node);
6413   EVT VT = Node->getValueType(0);
6414   SDValue Op = Node->getOperand(0);
6415   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6416 
6417   // If the non-ZERO_UNDEF version is supported we can use that instead.
6418   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6419       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6420     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6421     return true;
6422   }
6423 
6424   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6425   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6426     EVT SetCCVT =
6427         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6428     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6429     SDValue Zero = DAG.getConstant(0, dl, VT);
6430     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6431     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6432                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6433     return true;
6434   }
6435 
6436   // Only expand vector types if we have the appropriate vector bit operations.
6437   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6438                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6439                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6440                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6441                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6442                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6443     return false;
6444 
6445   // for now, we use: { return popcount(~x & (x - 1)); }
6446   // unless the target has ctlz but not ctpop, in which case we use:
6447   // { return 32 - nlz(~x & (x-1)); }
6448   // Ref: "Hacker's Delight" by Henry Warren
6449   SDValue Tmp = DAG.getNode(
6450       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6451       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6452 
6453   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6454   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6455     Result =
6456         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6457                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6458     return true;
6459   }
6460 
6461   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6462   return true;
6463 }
6464 
6465 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6466                                SelectionDAG &DAG) const {
6467   SDLoc dl(N);
6468   EVT VT = N->getValueType(0);
6469   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6470   SDValue Op = N->getOperand(0);
6471 
6472   // Only expand vector types if we have the appropriate vector operations.
6473   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6474                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6475                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6476     return false;
6477 
6478   SDValue Shift =
6479       DAG.getNode(ISD::SRA, dl, VT, Op,
6480                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6481   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6482   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6483   return true;
6484 }
6485 
6486 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6487                                             SelectionDAG &DAG) const {
6488   SDLoc SL(LD);
6489   SDValue Chain = LD->getChain();
6490   SDValue BasePTR = LD->getBasePtr();
6491   EVT SrcVT = LD->getMemoryVT();
6492   ISD::LoadExtType ExtType = LD->getExtensionType();
6493 
6494   unsigned NumElem = SrcVT.getVectorNumElements();
6495 
6496   EVT SrcEltVT = SrcVT.getScalarType();
6497   EVT DstEltVT = LD->getValueType(0).getScalarType();
6498 
6499   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6500   assert(SrcEltVT.isByteSized());
6501 
6502   SmallVector<SDValue, 8> Vals;
6503   SmallVector<SDValue, 8> LoadChains;
6504 
6505   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6506     SDValue ScalarLoad =
6507         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6508                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6509                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6510                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6511 
6512     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6513 
6514     Vals.push_back(ScalarLoad.getValue(0));
6515     LoadChains.push_back(ScalarLoad.getValue(1));
6516   }
6517 
6518   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6519   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6520 
6521   return DAG.getMergeValues({Value, NewChain}, SL);
6522 }
6523 
6524 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6525                                              SelectionDAG &DAG) const {
6526   SDLoc SL(ST);
6527 
6528   SDValue Chain = ST->getChain();
6529   SDValue BasePtr = ST->getBasePtr();
6530   SDValue Value = ST->getValue();
6531   EVT StVT = ST->getMemoryVT();
6532 
6533   // The type of the data we want to save
6534   EVT RegVT = Value.getValueType();
6535   EVT RegSclVT = RegVT.getScalarType();
6536 
6537   // The type of data as saved in memory.
6538   EVT MemSclVT = StVT.getScalarType();
6539 
6540   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
6541   unsigned NumElem = StVT.getVectorNumElements();
6542 
6543   // A vector must always be stored in memory as-is, i.e. without any padding
6544   // between the elements, since various code depend on it, e.g. in the
6545   // handling of a bitcast of a vector type to int, which may be done with a
6546   // vector store followed by an integer load. A vector that does not have
6547   // elements that are byte-sized must therefore be stored as an integer
6548   // built out of the extracted vector elements.
6549   if (!MemSclVT.isByteSized()) {
6550     unsigned NumBits = StVT.getSizeInBits();
6551     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6552 
6553     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6554 
6555     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6556       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6557                                 DAG.getConstant(Idx, SL, IdxVT));
6558       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6559       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6560       unsigned ShiftIntoIdx =
6561           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6562       SDValue ShiftAmount =
6563           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6564       SDValue ShiftedElt =
6565           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6566       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6567     }
6568 
6569     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6570                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6571                         ST->getAAInfo());
6572   }
6573 
6574   // Store Stride in bytes
6575   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6576   assert(Stride && "Zero stride!");
6577   // Extract each of the elements from the original vector and save them into
6578   // memory individually.
6579   SmallVector<SDValue, 8> Stores;
6580   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6581     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6582                               DAG.getConstant(Idx, SL, IdxVT));
6583 
6584     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6585 
6586     // This scalar TruncStore may be illegal, but we legalize it later.
6587     SDValue Store = DAG.getTruncStore(
6588         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6589         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6590         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6591 
6592     Stores.push_back(Store);
6593   }
6594 
6595   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6596 }
6597 
6598 std::pair<SDValue, SDValue>
6599 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6600   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6601          "unaligned indexed loads not implemented!");
6602   SDValue Chain = LD->getChain();
6603   SDValue Ptr = LD->getBasePtr();
6604   EVT VT = LD->getValueType(0);
6605   EVT LoadedVT = LD->getMemoryVT();
6606   SDLoc dl(LD);
6607   auto &MF = DAG.getMachineFunction();
6608 
6609   if (VT.isFloatingPoint() || VT.isVector()) {
6610     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6611     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6612       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6613           LoadedVT.isVector()) {
6614         // Scalarize the load and let the individual components be handled.
6615         SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
6616         if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
6617           return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
6618         return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
6619       }
6620 
6621       // Expand to a (misaligned) integer load of the same size,
6622       // then bitconvert to floating point or vector.
6623       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6624                                     LD->getMemOperand());
6625       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6626       if (LoadedVT != VT)
6627         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6628                              ISD::ANY_EXTEND, dl, VT, Result);
6629 
6630       return std::make_pair(Result, newLoad.getValue(1));
6631     }
6632 
6633     // Copy the value to a (aligned) stack slot using (unaligned) integer
6634     // loads and stores, then do a (aligned) load from the stack slot.
6635     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6636     unsigned LoadedBytes = LoadedVT.getStoreSize();
6637     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6638     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6639 
6640     // Make sure the stack slot is also aligned for the register type.
6641     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6642     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6643     SmallVector<SDValue, 8> Stores;
6644     SDValue StackPtr = StackBase;
6645     unsigned Offset = 0;
6646 
6647     EVT PtrVT = Ptr.getValueType();
6648     EVT StackPtrVT = StackPtr.getValueType();
6649 
6650     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6651     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6652 
6653     // Do all but one copies using the full register width.
6654     for (unsigned i = 1; i < NumRegs; i++) {
6655       // Load one integer register's worth from the original location.
6656       SDValue Load = DAG.getLoad(
6657           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6658           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6659           LD->getAAInfo());
6660       // Follow the load with a store to the stack slot.  Remember the store.
6661       Stores.push_back(DAG.getStore(
6662           Load.getValue(1), dl, Load, StackPtr,
6663           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6664       // Increment the pointers.
6665       Offset += RegBytes;
6666 
6667       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6668       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6669     }
6670 
6671     // The last copy may be partial.  Do an extending load.
6672     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6673                                   8 * (LoadedBytes - Offset));
6674     SDValue Load =
6675         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6676                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6677                        MinAlign(LD->getAlignment(), Offset),
6678                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6679     // Follow the load with a store to the stack slot.  Remember the store.
6680     // On big-endian machines this requires a truncating store to ensure
6681     // that the bits end up in the right place.
6682     Stores.push_back(DAG.getTruncStore(
6683         Load.getValue(1), dl, Load, StackPtr,
6684         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6685 
6686     // The order of the stores doesn't matter - say it with a TokenFactor.
6687     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6688 
6689     // Finally, perform the original load only redirected to the stack slot.
6690     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6691                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6692                           LoadedVT);
6693 
6694     // Callers expect a MERGE_VALUES node.
6695     return std::make_pair(Load, TF);
6696   }
6697 
6698   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6699          "Unaligned load of unsupported type.");
6700 
6701   // Compute the new VT that is half the size of the old one.  This is an
6702   // integer MVT.
6703   unsigned NumBits = LoadedVT.getSizeInBits();
6704   EVT NewLoadedVT;
6705   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6706   NumBits >>= 1;
6707 
6708   unsigned Alignment = LD->getAlignment();
6709   unsigned IncrementSize = NumBits / 8;
6710   ISD::LoadExtType HiExtType = LD->getExtensionType();
6711 
6712   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6713   if (HiExtType == ISD::NON_EXTLOAD)
6714     HiExtType = ISD::ZEXTLOAD;
6715 
6716   // Load the value in two parts
6717   SDValue Lo, Hi;
6718   if (DAG.getDataLayout().isLittleEndian()) {
6719     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6720                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6721                         LD->getAAInfo());
6722 
6723     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6724     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6725                         LD->getPointerInfo().getWithOffset(IncrementSize),
6726                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6727                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6728   } else {
6729     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6730                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6731                         LD->getAAInfo());
6732 
6733     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6734     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6735                         LD->getPointerInfo().getWithOffset(IncrementSize),
6736                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6737                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6738   }
6739 
6740   // aggregate the two parts
6741   SDValue ShiftAmount =
6742       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6743                                                     DAG.getDataLayout()));
6744   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6745   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6746 
6747   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6748                              Hi.getValue(1));
6749 
6750   return std::make_pair(Result, TF);
6751 }
6752 
6753 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6754                                              SelectionDAG &DAG) const {
6755   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6756          "unaligned indexed stores not implemented!");
6757   SDValue Chain = ST->getChain();
6758   SDValue Ptr = ST->getBasePtr();
6759   SDValue Val = ST->getValue();
6760   EVT VT = Val.getValueType();
6761   int Alignment = ST->getAlignment();
6762   auto &MF = DAG.getMachineFunction();
6763   EVT StoreMemVT = ST->getMemoryVT();
6764 
6765   SDLoc dl(ST);
6766   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6767     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6768     if (isTypeLegal(intVT)) {
6769       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6770           StoreMemVT.isVector()) {
6771         // Scalarize the store and let the individual components be handled.
6772         SDValue Result = scalarizeVectorStore(ST, DAG);
6773         return Result;
6774       }
6775       // Expand to a bitconvert of the value to the integer type of the
6776       // same size, then a (misaligned) int store.
6777       // FIXME: Does not handle truncating floating point stores!
6778       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
6779       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
6780                             Alignment, ST->getMemOperand()->getFlags());
6781       return Result;
6782     }
6783     // Do a (aligned) store to a stack slot, then copy from the stack slot
6784     // to the final destination using (unaligned) integer loads and stores.
6785     MVT RegVT = getRegisterType(
6786         *DAG.getContext(),
6787         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
6788     EVT PtrVT = Ptr.getValueType();
6789     unsigned StoredBytes = StoreMemVT.getStoreSize();
6790     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6791     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
6792 
6793     // Make sure the stack slot is also aligned for the register type.
6794     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
6795     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6796 
6797     // Perform the original store, only redirected to the stack slot.
6798     SDValue Store = DAG.getTruncStore(
6799         Chain, dl, Val, StackPtr,
6800         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
6801 
6802     EVT StackPtrVT = StackPtr.getValueType();
6803 
6804     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6805     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6806     SmallVector<SDValue, 8> Stores;
6807     unsigned Offset = 0;
6808 
6809     // Do all but one copies using the full register width.
6810     for (unsigned i = 1; i < NumRegs; i++) {
6811       // Load one integer register's worth from the stack slot.
6812       SDValue Load = DAG.getLoad(
6813           RegVT, dl, Store, StackPtr,
6814           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
6815       // Store it to the final location.  Remember the store.
6816       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
6817                                     ST->getPointerInfo().getWithOffset(Offset),
6818                                     MinAlign(ST->getAlignment(), Offset),
6819                                     ST->getMemOperand()->getFlags()));
6820       // Increment the pointers.
6821       Offset += RegBytes;
6822       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6823       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6824     }
6825 
6826     // The last store may be partial.  Do a truncating store.  On big-endian
6827     // machines this requires an extending load from the stack slot to ensure
6828     // that the bits are in the right place.
6829     EVT LoadMemVT =
6830         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
6831 
6832     // Load from the stack slot.
6833     SDValue Load = DAG.getExtLoad(
6834         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
6835         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
6836 
6837     Stores.push_back(
6838         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
6839                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
6840                           MinAlign(ST->getAlignment(), Offset),
6841                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
6842     // The order of the stores doesn't matter - say it with a TokenFactor.
6843     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6844     return Result;
6845   }
6846 
6847   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
6848          "Unaligned store of unknown type.");
6849   // Get the half-size VT
6850   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
6851   int NumBits = NewStoredVT.getSizeInBits();
6852   int IncrementSize = NumBits / 8;
6853 
6854   // Divide the stored value in two parts.
6855   SDValue ShiftAmount = DAG.getConstant(
6856       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
6857   SDValue Lo = Val;
6858   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
6859 
6860   // Store the two parts
6861   SDValue Store1, Store2;
6862   Store1 = DAG.getTruncStore(Chain, dl,
6863                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
6864                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
6865                              ST->getMemOperand()->getFlags());
6866 
6867   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6868   Alignment = MinAlign(Alignment, IncrementSize);
6869   Store2 = DAG.getTruncStore(
6870       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
6871       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
6872       ST->getMemOperand()->getFlags(), ST->getAAInfo());
6873 
6874   SDValue Result =
6875       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
6876   return Result;
6877 }
6878 
6879 SDValue
6880 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
6881                                        const SDLoc &DL, EVT DataVT,
6882                                        SelectionDAG &DAG,
6883                                        bool IsCompressedMemory) const {
6884   SDValue Increment;
6885   EVT AddrVT = Addr.getValueType();
6886   EVT MaskVT = Mask.getValueType();
6887   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
6888          "Incompatible types of Data and Mask");
6889   if (IsCompressedMemory) {
6890     // Incrementing the pointer according to number of '1's in the mask.
6891     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
6892     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
6893     if (MaskIntVT.getSizeInBits() < 32) {
6894       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
6895       MaskIntVT = MVT::i32;
6896     }
6897 
6898     // Count '1's with POPCNT.
6899     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
6900     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
6901     // Scale is an element size in bytes.
6902     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
6903                                     AddrVT);
6904     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
6905   } else
6906     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
6907 
6908   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
6909 }
6910 
6911 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
6912                                        SDValue Idx,
6913                                        EVT VecVT,
6914                                        const SDLoc &dl) {
6915   if (isa<ConstantSDNode>(Idx))
6916     return Idx;
6917 
6918   EVT IdxVT = Idx.getValueType();
6919   unsigned NElts = VecVT.getVectorNumElements();
6920   if (isPowerOf2_32(NElts)) {
6921     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
6922                                      Log2_32(NElts));
6923     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
6924                        DAG.getConstant(Imm, dl, IdxVT));
6925   }
6926 
6927   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
6928                      DAG.getConstant(NElts - 1, dl, IdxVT));
6929 }
6930 
6931 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
6932                                                 SDValue VecPtr, EVT VecVT,
6933                                                 SDValue Index) const {
6934   SDLoc dl(Index);
6935   // Make sure the index type is big enough to compute in.
6936   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
6937 
6938   EVT EltVT = VecVT.getVectorElementType();
6939 
6940   // Calculate the element offset and add it to the pointer.
6941   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
6942   assert(EltSize * 8 == EltVT.getSizeInBits() &&
6943          "Converting bits to bytes lost precision");
6944 
6945   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
6946 
6947   EVT IdxVT = Index.getValueType();
6948 
6949   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
6950                       DAG.getConstant(EltSize, dl, IdxVT));
6951   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
6952 }
6953 
6954 //===----------------------------------------------------------------------===//
6955 // Implementation of Emulated TLS Model
6956 //===----------------------------------------------------------------------===//
6957 
6958 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
6959                                                 SelectionDAG &DAG) const {
6960   // Access to address of TLS varialbe xyz is lowered to a function call:
6961   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
6962   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6963   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
6964   SDLoc dl(GA);
6965 
6966   ArgListTy Args;
6967   ArgListEntry Entry;
6968   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
6969   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
6970   StringRef EmuTlsVarName(NameString);
6971   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
6972   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
6973   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
6974   Entry.Ty = VoidPtrType;
6975   Args.push_back(Entry);
6976 
6977   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
6978 
6979   TargetLowering::CallLoweringInfo CLI(DAG);
6980   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
6981   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
6982   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6983 
6984   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
6985   // At last for X86 targets, maybe good for other targets too?
6986   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6987   MFI.setAdjustsStack(true); // Is this only for X86 target?
6988   MFI.setHasCalls(true);
6989 
6990   assert((GA->getOffset() == 0) &&
6991          "Emulated TLS must have zero offset in GlobalAddressSDNode");
6992   return CallResult.first;
6993 }
6994 
6995 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
6996                                                 SelectionDAG &DAG) const {
6997   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
6998   if (!isCtlzFast())
6999     return SDValue();
7000   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7001   SDLoc dl(Op);
7002   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7003     if (C->isNullValue() && CC == ISD::SETEQ) {
7004       EVT VT = Op.getOperand(0).getValueType();
7005       SDValue Zext = Op.getOperand(0);
7006       if (VT.bitsLT(MVT::i32)) {
7007         VT = MVT::i32;
7008         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7009       }
7010       unsigned Log2b = Log2_32(VT.getSizeInBits());
7011       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7012       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7013                                 DAG.getConstant(Log2b, dl, MVT::i32));
7014       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7015     }
7016   }
7017   return SDValue();
7018 }
7019 
7020 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7021   unsigned Opcode = Node->getOpcode();
7022   SDValue LHS = Node->getOperand(0);
7023   SDValue RHS = Node->getOperand(1);
7024   EVT VT = LHS.getValueType();
7025   SDLoc dl(Node);
7026 
7027   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7028   assert(VT.isInteger() && "Expected operands to be integers");
7029 
7030   // usub.sat(a, b) -> umax(a, b) - b
7031   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
7032     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7033     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7034   }
7035 
7036   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
7037     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7038     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7039     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7040   }
7041 
7042   unsigned OverflowOp;
7043   switch (Opcode) {
7044   case ISD::SADDSAT:
7045     OverflowOp = ISD::SADDO;
7046     break;
7047   case ISD::UADDSAT:
7048     OverflowOp = ISD::UADDO;
7049     break;
7050   case ISD::SSUBSAT:
7051     OverflowOp = ISD::SSUBO;
7052     break;
7053   case ISD::USUBSAT:
7054     OverflowOp = ISD::USUBO;
7055     break;
7056   default:
7057     llvm_unreachable("Expected method to receive signed or unsigned saturation "
7058                      "addition or subtraction node.");
7059   }
7060 
7061   unsigned BitWidth = LHS.getScalarValueSizeInBits();
7062   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7063   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7064                                LHS, RHS);
7065   SDValue SumDiff = Result.getValue(0);
7066   SDValue Overflow = Result.getValue(1);
7067   SDValue Zero = DAG.getConstant(0, dl, VT);
7068   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7069 
7070   if (Opcode == ISD::UADDSAT) {
7071     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7072       // (LHS + RHS) | OverflowMask
7073       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7074       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7075     }
7076     // Overflow ? 0xffff.... : (LHS + RHS)
7077     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7078   } else if (Opcode == ISD::USUBSAT) {
7079     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7080       // (LHS - RHS) & ~OverflowMask
7081       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7082       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7083       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7084     }
7085     // Overflow ? 0 : (LHS - RHS)
7086     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7087   } else {
7088     // SatMax -> Overflow && SumDiff < 0
7089     // SatMin -> Overflow && SumDiff >= 0
7090     APInt MinVal = APInt::getSignedMinValue(BitWidth);
7091     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7092     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7093     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7094     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7095     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7096     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7097   }
7098 }
7099 
7100 SDValue
7101 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7102   assert((Node->getOpcode() == ISD::SMULFIX ||
7103           Node->getOpcode() == ISD::UMULFIX ||
7104           Node->getOpcode() == ISD::SMULFIXSAT ||
7105           Node->getOpcode() == ISD::UMULFIXSAT) &&
7106          "Expected a fixed point multiplication opcode");
7107 
7108   SDLoc dl(Node);
7109   SDValue LHS = Node->getOperand(0);
7110   SDValue RHS = Node->getOperand(1);
7111   EVT VT = LHS.getValueType();
7112   unsigned Scale = Node->getConstantOperandVal(2);
7113   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7114                      Node->getOpcode() == ISD::UMULFIXSAT);
7115   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7116                  Node->getOpcode() == ISD::SMULFIXSAT);
7117   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7118   unsigned VTSize = VT.getScalarSizeInBits();
7119 
7120   if (!Scale) {
7121     // [us]mul.fix(a, b, 0) -> mul(a, b)
7122     if (!Saturating) {
7123       if (isOperationLegalOrCustom(ISD::MUL, VT))
7124         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7125     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7126       SDValue Result =
7127           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7128       SDValue Product = Result.getValue(0);
7129       SDValue Overflow = Result.getValue(1);
7130       SDValue Zero = DAG.getConstant(0, dl, VT);
7131 
7132       APInt MinVal = APInt::getSignedMinValue(VTSize);
7133       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7134       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7135       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7136       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7137       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7138       return DAG.getSelect(dl, VT, Overflow, Result, Product);
7139     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7140       SDValue Result =
7141           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7142       SDValue Product = Result.getValue(0);
7143       SDValue Overflow = Result.getValue(1);
7144 
7145       APInt MaxVal = APInt::getMaxValue(VTSize);
7146       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7147       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7148     }
7149   }
7150 
7151   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7152          "Expected scale to be less than the number of bits if signed or at "
7153          "most the number of bits if unsigned.");
7154   assert(LHS.getValueType() == RHS.getValueType() &&
7155          "Expected both operands to be the same type");
7156 
7157   // Get the upper and lower bits of the result.
7158   SDValue Lo, Hi;
7159   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7160   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7161   if (isOperationLegalOrCustom(LoHiOp, VT)) {
7162     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7163     Lo = Result.getValue(0);
7164     Hi = Result.getValue(1);
7165   } else if (isOperationLegalOrCustom(HiOp, VT)) {
7166     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7167     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7168   } else if (VT.isVector()) {
7169     return SDValue();
7170   } else {
7171     report_fatal_error("Unable to expand fixed point multiplication.");
7172   }
7173 
7174   if (Scale == VTSize)
7175     // Result is just the top half since we'd be shifting by the width of the
7176     // operand. Overflow impossible so this works for both UMULFIX and
7177     // UMULFIXSAT.
7178     return Hi;
7179 
7180   // The result will need to be shifted right by the scale since both operands
7181   // are scaled. The result is given to us in 2 halves, so we only want part of
7182   // both in the result.
7183   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7184   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7185                                DAG.getConstant(Scale, dl, ShiftTy));
7186   if (!Saturating)
7187     return Result;
7188 
7189   if (!Signed) {
7190     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7191     // widened multiplication) aren't all zeroes.
7192 
7193     // Saturate to max if ((Hi >> Scale) != 0),
7194     // which is the same as if (Hi > ((1 << Scale) - 1))
7195     APInt MaxVal = APInt::getMaxValue(VTSize);
7196     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7197                                       dl, VT);
7198     Result = DAG.getSelectCC(dl, Hi, LowMask,
7199                              DAG.getConstant(MaxVal, dl, VT), Result,
7200                              ISD::SETUGT);
7201 
7202     return Result;
7203   }
7204 
7205   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7206   // widened multiplication) aren't all ones or all zeroes.
7207 
7208   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7209   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7210 
7211   if (Scale == 0) {
7212     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7213                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
7214     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7215     // Saturated to SatMin if wide product is negative, and SatMax if wide
7216     // product is positive ...
7217     SDValue Zero = DAG.getConstant(0, dl, VT);
7218     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7219                                                ISD::SETLT);
7220     // ... but only if we overflowed.
7221     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7222   }
7223 
7224   //  We handled Scale==0 above so all the bits to examine is in Hi.
7225 
7226   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7227   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7228   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7229                                     dl, VT);
7230   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7231   // Saturate to min if (Hi >> (Scale - 1)) < -1),
7232   // which is the same as if (HI < (-1 << (Scale - 1))
7233   SDValue HighMask =
7234       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7235                       dl, VT);
7236   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7237   return Result;
7238 }
7239 
7240 void TargetLowering::expandUADDSUBO(
7241     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7242   SDLoc dl(Node);
7243   SDValue LHS = Node->getOperand(0);
7244   SDValue RHS = Node->getOperand(1);
7245   bool IsAdd = Node->getOpcode() == ISD::UADDO;
7246 
7247   // If ADD/SUBCARRY is legal, use that instead.
7248   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7249   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7250     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7251     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7252                                     { LHS, RHS, CarryIn });
7253     Result = SDValue(NodeCarry.getNode(), 0);
7254     Overflow = SDValue(NodeCarry.getNode(), 1);
7255     return;
7256   }
7257 
7258   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7259                             LHS.getValueType(), LHS, RHS);
7260 
7261   EVT ResultType = Node->getValueType(1);
7262   EVT SetCCType = getSetCCResultType(
7263       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7264   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7265   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7266   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7267 }
7268 
7269 void TargetLowering::expandSADDSUBO(
7270     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7271   SDLoc dl(Node);
7272   SDValue LHS = Node->getOperand(0);
7273   SDValue RHS = Node->getOperand(1);
7274   bool IsAdd = Node->getOpcode() == ISD::SADDO;
7275 
7276   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7277                             LHS.getValueType(), LHS, RHS);
7278 
7279   EVT ResultType = Node->getValueType(1);
7280   EVT OType = getSetCCResultType(
7281       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7282 
7283   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7284   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7285   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7286     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7287     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7288     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7289     return;
7290   }
7291 
7292   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7293 
7294   // For an addition, the result should be less than one of the operands (LHS)
7295   // if and only if the other operand (RHS) is negative, otherwise there will
7296   // be overflow.
7297   // For a subtraction, the result should be less than one of the operands
7298   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7299   // otherwise there will be overflow.
7300   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7301   SDValue ConditionRHS =
7302       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7303 
7304   Overflow = DAG.getBoolExtOrTrunc(
7305       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7306       ResultType, ResultType);
7307 }
7308 
7309 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7310                                 SDValue &Overflow, SelectionDAG &DAG) const {
7311   SDLoc dl(Node);
7312   EVT VT = Node->getValueType(0);
7313   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7314   SDValue LHS = Node->getOperand(0);
7315   SDValue RHS = Node->getOperand(1);
7316   bool isSigned = Node->getOpcode() == ISD::SMULO;
7317 
7318   // For power-of-two multiplications we can use a simpler shift expansion.
7319   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7320     const APInt &C = RHSC->getAPIntValue();
7321     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7322     if (C.isPowerOf2()) {
7323       // smulo(x, signed_min) is same as umulo(x, signed_min).
7324       bool UseArithShift = isSigned && !C.isMinSignedValue();
7325       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7326       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7327       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7328       Overflow = DAG.getSetCC(dl, SetCCVT,
7329           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7330                       dl, VT, Result, ShiftAmt),
7331           LHS, ISD::SETNE);
7332       return true;
7333     }
7334   }
7335 
7336   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7337   if (VT.isVector())
7338     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7339                               VT.getVectorNumElements());
7340 
7341   SDValue BottomHalf;
7342   SDValue TopHalf;
7343   static const unsigned Ops[2][3] =
7344       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7345         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7346   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7347     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7348     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7349   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7350     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7351                              RHS);
7352     TopHalf = BottomHalf.getValue(1);
7353   } else if (isTypeLegal(WideVT)) {
7354     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7355     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7356     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7357     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7358     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7359         getShiftAmountTy(WideVT, DAG.getDataLayout()));
7360     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7361                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7362   } else {
7363     if (VT.isVector())
7364       return false;
7365 
7366     // We can fall back to a libcall with an illegal type for the MUL if we
7367     // have a libcall big enough.
7368     // Also, we can fall back to a division in some cases, but that's a big
7369     // performance hit in the general case.
7370     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7371     if (WideVT == MVT::i16)
7372       LC = RTLIB::MUL_I16;
7373     else if (WideVT == MVT::i32)
7374       LC = RTLIB::MUL_I32;
7375     else if (WideVT == MVT::i64)
7376       LC = RTLIB::MUL_I64;
7377     else if (WideVT == MVT::i128)
7378       LC = RTLIB::MUL_I128;
7379     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7380 
7381     SDValue HiLHS;
7382     SDValue HiRHS;
7383     if (isSigned) {
7384       // The high part is obtained by SRA'ing all but one of the bits of low
7385       // part.
7386       unsigned LoSize = VT.getSizeInBits();
7387       HiLHS =
7388           DAG.getNode(ISD::SRA, dl, VT, LHS,
7389                       DAG.getConstant(LoSize - 1, dl,
7390                                       getPointerTy(DAG.getDataLayout())));
7391       HiRHS =
7392           DAG.getNode(ISD::SRA, dl, VT, RHS,
7393                       DAG.getConstant(LoSize - 1, dl,
7394                                       getPointerTy(DAG.getDataLayout())));
7395     } else {
7396         HiLHS = DAG.getConstant(0, dl, VT);
7397         HiRHS = DAG.getConstant(0, dl, VT);
7398     }
7399 
7400     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7401     // pre-lowered to the correct types. This all depends upon WideVT not
7402     // being a legal type for the architecture and thus has to be split to
7403     // two arguments.
7404     SDValue Ret;
7405     TargetLowering::MakeLibCallOptions CallOptions;
7406     CallOptions.setSExt(isSigned);
7407     CallOptions.setIsPostTypeLegalization(true);
7408     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7409       // Halves of WideVT are packed into registers in different order
7410       // depending on platform endianness. This is usually handled by
7411       // the C calling convention, but we can't defer to it in
7412       // the legalizer.
7413       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7414       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7415     } else {
7416       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7417       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7418     }
7419     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7420            "Ret value is a collection of constituent nodes holding result.");
7421     if (DAG.getDataLayout().isLittleEndian()) {
7422       // Same as above.
7423       BottomHalf = Ret.getOperand(0);
7424       TopHalf = Ret.getOperand(1);
7425     } else {
7426       BottomHalf = Ret.getOperand(1);
7427       TopHalf = Ret.getOperand(0);
7428     }
7429   }
7430 
7431   Result = BottomHalf;
7432   if (isSigned) {
7433     SDValue ShiftAmt = DAG.getConstant(
7434         VT.getScalarSizeInBits() - 1, dl,
7435         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7436     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7437     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7438   } else {
7439     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7440                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7441   }
7442 
7443   // Truncate the result if SetCC returns a larger type than needed.
7444   EVT RType = Node->getValueType(1);
7445   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7446     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7447 
7448   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7449          "Unexpected result type for S/UMULO legalization");
7450   return true;
7451 }
7452 
7453 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7454   SDLoc dl(Node);
7455   bool NoNaN = Node->getFlags().hasNoNaNs();
7456   unsigned BaseOpcode = 0;
7457   switch (Node->getOpcode()) {
7458   default: llvm_unreachable("Expected VECREDUCE opcode");
7459   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7460   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7461   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7462   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7463   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7464   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7465   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7466   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7467   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7468   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7469   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7470   case ISD::VECREDUCE_FMAX:
7471     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7472     break;
7473   case ISD::VECREDUCE_FMIN:
7474     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7475     break;
7476   }
7477 
7478   SDValue Op = Node->getOperand(0);
7479   EVT VT = Op.getValueType();
7480 
7481   // Try to use a shuffle reduction for power of two vectors.
7482   if (VT.isPow2VectorType()) {
7483     while (VT.getVectorNumElements() > 1) {
7484       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7485       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7486         break;
7487 
7488       SDValue Lo, Hi;
7489       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7490       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7491       VT = HalfVT;
7492     }
7493   }
7494 
7495   EVT EltVT = VT.getVectorElementType();
7496   unsigned NumElts = VT.getVectorNumElements();
7497 
7498   SmallVector<SDValue, 8> Ops;
7499   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7500 
7501   SDValue Res = Ops[0];
7502   for (unsigned i = 1; i < NumElts; i++)
7503     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7504 
7505   // Result type may be wider than element type.
7506   if (EltVT != Node->getValueType(0))
7507     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7508   return Res;
7509 }
7510