1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 
87 static cl::opt<bool>
88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
90 
91 static cl::opt<bool>
92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
93         cl::desc("Enable DAG combiner's use of TBAA"));
94 
95 #ifndef NDEBUG
96 static cl::opt<std::string>
97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
98                    cl::desc("Only use DAG-combiner alias analysis in this"
99                             " function"));
100 #endif
101 
102 /// Hidden option to stress test load slicing, i.e., when this option
103 /// is enabled, load slicing bypasses most of its profitability guards.
104 static cl::opt<bool>
105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
106                   cl::desc("Bypass the profitability model of load slicing"),
107                   cl::init(false));
108 
109 static cl::opt<bool>
110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
111                     cl::desc("DAG combiner may split indexing from loads"));
112 
113 namespace {
114 
115   class DAGCombiner {
116     SelectionDAG &DAG;
117     const TargetLowering &TLI;
118     CombineLevel Level;
119     CodeGenOpt::Level OptLevel;
120     bool LegalOperations = false;
121     bool LegalTypes = false;
122     bool ForCodeSize;
123 
124     /// Worklist of all of the nodes that need to be simplified.
125     ///
126     /// This must behave as a stack -- new nodes to process are pushed onto the
127     /// back and when processing we pop off of the back.
128     ///
129     /// The worklist will not contain duplicates but may contain null entries
130     /// due to nodes being deleted from the underlying DAG.
131     SmallVector<SDNode *, 64> Worklist;
132 
133     /// Mapping from an SDNode to its position on the worklist.
134     ///
135     /// This is used to find and remove nodes from the worklist (by nulling
136     /// them) when they are deleted from the underlying DAG. It relies on
137     /// stable indices of nodes within the worklist.
138     DenseMap<SDNode *, unsigned> WorklistMap;
139 
140     /// Set of nodes which have been combined (at least once).
141     ///
142     /// This is used to allow us to reliably add any operands of a DAG node
143     /// which have not yet been combined to the worklist.
144     SmallPtrSet<SDNode *, 32> CombinedNodes;
145 
146     // AA - Used for DAG load/store alias analysis.
147     AliasAnalysis *AA;
148 
149     /// When an instruction is simplified, add all users of the instruction to
150     /// the work lists because they might get more simplified now.
151     void AddUsersToWorklist(SDNode *N) {
152       for (SDNode *Node : N->uses())
153         AddToWorklist(Node);
154     }
155 
156     /// Call the node-specific routine that folds each particular type of node.
157     SDValue visit(SDNode *N);
158 
159   public:
160     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
161         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
162           OptLevel(OL), AA(AA) {
163       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
164 
165       MaximumLegalStoreInBits = 0;
166       for (MVT VT : MVT::all_valuetypes())
167         if (EVT(VT).isSimple() && VT != MVT::Other &&
168             TLI.isTypeLegal(EVT(VT)) &&
169             VT.getSizeInBits() >= MaximumLegalStoreInBits)
170           MaximumLegalStoreInBits = VT.getSizeInBits();
171     }
172 
173     /// Add to the worklist making sure its instance is at the back (next to be
174     /// processed.)
175     void AddToWorklist(SDNode *N) {
176       assert(N->getOpcode() != ISD::DELETED_NODE &&
177              "Deleted Node added to Worklist");
178 
179       // Skip handle nodes as they can't usefully be combined and confuse the
180       // zero-use deletion strategy.
181       if (N->getOpcode() == ISD::HANDLENODE)
182         return;
183 
184       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
185         Worklist.push_back(N);
186     }
187 
188     /// Remove all instances of N from the worklist.
189     void removeFromWorklist(SDNode *N) {
190       CombinedNodes.erase(N);
191 
192       auto It = WorklistMap.find(N);
193       if (It == WorklistMap.end())
194         return; // Not in the worklist.
195 
196       // Null out the entry rather than erasing it to avoid a linear operation.
197       Worklist[It->second] = nullptr;
198       WorklistMap.erase(It);
199     }
200 
201     void deleteAndRecombine(SDNode *N);
202     bool recursivelyDeleteUnusedNodes(SDNode *N);
203 
204     /// Replaces all uses of the results of one DAG node with new values.
205     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
206                       bool AddTo = true);
207 
208     /// Replaces all uses of the results of one DAG node with new values.
209     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
210       return CombineTo(N, &Res, 1, AddTo);
211     }
212 
213     /// Replaces all uses of the results of one DAG node with new values.
214     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
215                       bool AddTo = true) {
216       SDValue To[] = { Res0, Res1 };
217       return CombineTo(N, To, 2, AddTo);
218     }
219 
220     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
221 
222   private:
223     unsigned MaximumLegalStoreInBits;
224 
225     /// Check the specified integer node value to see if it can be simplified or
226     /// if things it uses can be simplified by bit propagation.
227     /// If so, return true.
228     bool SimplifyDemandedBits(SDValue Op) {
229       unsigned BitWidth = Op.getScalarValueSizeInBits();
230       APInt Demanded = APInt::getAllOnesValue(BitWidth);
231       return SimplifyDemandedBits(Op, Demanded);
232     }
233 
234     /// Check the specified vector node value to see if it can be simplified or
235     /// if things it uses can be simplified as it only uses some of the
236     /// elements. If so, return true.
237     bool SimplifyDemandedVectorElts(SDValue Op) {
238       unsigned NumElts = Op.getValueType().getVectorNumElements();
239       APInt Demanded = APInt::getAllOnesValue(NumElts);
240       return SimplifyDemandedVectorElts(Op, Demanded);
241     }
242 
243     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
244     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
245                                     bool AssumeSingleUse = false);
246 
247     bool CombineToPreIndexedLoadStore(SDNode *N);
248     bool CombineToPostIndexedLoadStore(SDNode *N);
249     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250     bool SliceUpLoad(SDNode *N);
251 
252     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253     ///   load.
254     ///
255     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257     /// \param EltNo index of the vector element to load.
258     /// \param OriginalLoad load that EVE came from to be replaced.
259     /// \returns EVE on success SDValue() on failure.
260     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266     SDValue PromoteIntBinOp(SDValue Op);
267     SDValue PromoteIntShiftOp(SDValue Op);
268     SDValue PromoteExtend(SDValue Op);
269     bool PromoteLoad(SDValue Op);
270 
271     /// Call the node-specific routine that knows how to fold each
272     /// particular type of node. If that doesn't do anything, try the
273     /// target-specific DAG combines.
274     SDValue combine(SDNode *N);
275 
276     // Visitation implementation - Implement dag node combining for different
277     // node types.  The semantics are as follows:
278     // Return Value:
279     //   SDValue.getNode() == 0 - No change was made
280     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
281     //   otherwise              - N should be replaced by the returned Operand.
282     //
283     SDValue visitTokenFactor(SDNode *N);
284     SDValue visitMERGE_VALUES(SDNode *N);
285     SDValue visitADD(SDNode *N);
286     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
287     SDValue visitSUB(SDNode *N);
288     SDValue visitADDC(SDNode *N);
289     SDValue visitUADDO(SDNode *N);
290     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
291     SDValue visitSUBC(SDNode *N);
292     SDValue visitUSUBO(SDNode *N);
293     SDValue visitADDE(SDNode *N);
294     SDValue visitADDCARRY(SDNode *N);
295     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
296     SDValue visitSUBE(SDNode *N);
297     SDValue visitSUBCARRY(SDNode *N);
298     SDValue visitMUL(SDNode *N);
299     SDValue useDivRem(SDNode *N);
300     SDValue visitSDIV(SDNode *N);
301     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
302     SDValue visitUDIV(SDNode *N);
303     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
304     SDValue visitREM(SDNode *N);
305     SDValue visitMULHU(SDNode *N);
306     SDValue visitMULHS(SDNode *N);
307     SDValue visitSMUL_LOHI(SDNode *N);
308     SDValue visitUMUL_LOHI(SDNode *N);
309     SDValue visitSMULO(SDNode *N);
310     SDValue visitUMULO(SDNode *N);
311     SDValue visitIMINMAX(SDNode *N);
312     SDValue visitAND(SDNode *N);
313     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
314     SDValue visitOR(SDNode *N);
315     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
316     SDValue visitXOR(SDNode *N);
317     SDValue SimplifyVBinOp(SDNode *N);
318     SDValue visitSHL(SDNode *N);
319     SDValue visitSRA(SDNode *N);
320     SDValue visitSRL(SDNode *N);
321     SDValue visitRotate(SDNode *N);
322     SDValue visitABS(SDNode *N);
323     SDValue visitBSWAP(SDNode *N);
324     SDValue visitBITREVERSE(SDNode *N);
325     SDValue visitCTLZ(SDNode *N);
326     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
327     SDValue visitCTTZ(SDNode *N);
328     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
329     SDValue visitCTPOP(SDNode *N);
330     SDValue visitSELECT(SDNode *N);
331     SDValue visitVSELECT(SDNode *N);
332     SDValue visitSELECT_CC(SDNode *N);
333     SDValue visitSETCC(SDNode *N);
334     SDValue visitSETCCCARRY(SDNode *N);
335     SDValue visitSIGN_EXTEND(SDNode *N);
336     SDValue visitZERO_EXTEND(SDNode *N);
337     SDValue visitANY_EXTEND(SDNode *N);
338     SDValue visitAssertExt(SDNode *N);
339     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
340     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
341     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
342     SDValue visitTRUNCATE(SDNode *N);
343     SDValue visitBITCAST(SDNode *N);
344     SDValue visitBUILD_PAIR(SDNode *N);
345     SDValue visitFADD(SDNode *N);
346     SDValue visitFSUB(SDNode *N);
347     SDValue visitFMUL(SDNode *N);
348     SDValue visitFMA(SDNode *N);
349     SDValue visitFDIV(SDNode *N);
350     SDValue visitFREM(SDNode *N);
351     SDValue visitFSQRT(SDNode *N);
352     SDValue visitFCOPYSIGN(SDNode *N);
353     SDValue visitSINT_TO_FP(SDNode *N);
354     SDValue visitUINT_TO_FP(SDNode *N);
355     SDValue visitFP_TO_SINT(SDNode *N);
356     SDValue visitFP_TO_UINT(SDNode *N);
357     SDValue visitFP_ROUND(SDNode *N);
358     SDValue visitFP_ROUND_INREG(SDNode *N);
359     SDValue visitFP_EXTEND(SDNode *N);
360     SDValue visitFNEG(SDNode *N);
361     SDValue visitFABS(SDNode *N);
362     SDValue visitFCEIL(SDNode *N);
363     SDValue visitFTRUNC(SDNode *N);
364     SDValue visitFFLOOR(SDNode *N);
365     SDValue visitFMINNUM(SDNode *N);
366     SDValue visitFMAXNUM(SDNode *N);
367     SDValue visitBRCOND(SDNode *N);
368     SDValue visitBR_CC(SDNode *N);
369     SDValue visitLOAD(SDNode *N);
370 
371     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
372     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
373 
374     SDValue visitSTORE(SDNode *N);
375     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
376     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
377     SDValue visitBUILD_VECTOR(SDNode *N);
378     SDValue visitCONCAT_VECTORS(SDNode *N);
379     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
380     SDValue visitVECTOR_SHUFFLE(SDNode *N);
381     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
382     SDValue visitINSERT_SUBVECTOR(SDNode *N);
383     SDValue visitMLOAD(SDNode *N);
384     SDValue visitMSTORE(SDNode *N);
385     SDValue visitMGATHER(SDNode *N);
386     SDValue visitMSCATTER(SDNode *N);
387     SDValue visitFP_TO_FP16(SDNode *N);
388     SDValue visitFP16_TO_FP(SDNode *N);
389 
390     SDValue visitFADDForFMACombine(SDNode *N);
391     SDValue visitFSUBForFMACombine(SDNode *N);
392     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
393 
394     SDValue XformToShuffleWithZero(SDNode *N);
395     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
396                            SDValue N1, SDNodeFlags Flags);
397 
398     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
399 
400     SDValue foldSelectOfConstants(SDNode *N);
401     SDValue foldVSelectOfConstants(SDNode *N);
402     SDValue foldBinOpIntoSelect(SDNode *BO);
403     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
404     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
405     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
406     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
407                              SDValue N2, SDValue N3, ISD::CondCode CC,
408                              bool NotExtCompare = false);
409     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
410                                    SDValue N2, SDValue N3, ISD::CondCode CC);
411     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
412                               const SDLoc &DL);
413     SDValue unfoldMaskedMerge(SDNode *N);
414     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
415     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
416                           const SDLoc &DL, bool foldBooleans);
417     SDValue rebuildSetCC(SDValue N);
418 
419     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
420                            SDValue &CC) const;
421     bool isOneUseSetCC(SDValue N) const;
422 
423     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
424                                          unsigned HiOp);
425     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
426     SDValue CombineExtLoad(SDNode *N);
427     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
428     SDValue combineRepeatedFPDivisors(SDNode *N);
429     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
430     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
431     SDValue BuildSDIV(SDNode *N);
432     SDValue BuildSDIVPow2(SDNode *N);
433     SDValue BuildUDIV(SDNode *N);
434     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
435     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
436     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
437     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
438     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
439     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
440                                 SDNodeFlags Flags, bool Reciprocal);
441     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
442                                 SDNodeFlags Flags, bool Reciprocal);
443     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
444                                bool DemandHighBits = true);
445     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
446     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
447                               SDValue InnerPos, SDValue InnerNeg,
448                               unsigned PosOpcode, unsigned NegOpcode,
449                               const SDLoc &DL);
450     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
451     SDValue MatchLoadCombine(SDNode *N);
452     SDValue ReduceLoadWidth(SDNode *N);
453     SDValue ReduceLoadOpStoreWidth(SDNode *N);
454     SDValue splitMergedValStore(StoreSDNode *ST);
455     SDValue TransformFPLoadStorePair(SDNode *N);
456     SDValue convertBuildVecZextToZext(SDNode *N);
457     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
458     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
459     SDValue reduceBuildVecToShuffle(SDNode *N);
460     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
461                                   ArrayRef<int> VectorMask, SDValue VecIn1,
462                                   SDValue VecIn2, unsigned LeftIdx);
463     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
464 
465     /// Walk up chain skipping non-aliasing memory nodes,
466     /// looking for aliasing nodes and adding them to the Aliases vector.
467     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
468                           SmallVectorImpl<SDValue> &Aliases);
469 
470     /// Return true if there is any possibility that the two addresses overlap.
471     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
472 
473     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
474     /// chain (aliasing node.)
475     SDValue FindBetterChain(SDNode *N, SDValue Chain);
476 
477     /// Try to replace a store and any possibly adjacent stores on
478     /// consecutive chains with better chains. Return true only if St is
479     /// replaced.
480     ///
481     /// Notice that other chains may still be replaced even if the function
482     /// returns false.
483     bool findBetterNeighborChains(StoreSDNode *St);
484 
485     /// Holds a pointer to an LSBaseSDNode as well as information on where it
486     /// is located in a sequence of memory operations connected by a chain.
487     struct MemOpLink {
488       // Ptr to the mem node.
489       LSBaseSDNode *MemNode;
490 
491       // Offset from the base ptr.
492       int64_t OffsetFromBase;
493 
494       MemOpLink(LSBaseSDNode *N, int64_t Offset)
495           : MemNode(N), OffsetFromBase(Offset) {}
496     };
497 
498     /// This is a helper function for visitMUL to check the profitability
499     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
500     /// MulNode is the original multiply, AddNode is (add x, c1),
501     /// and ConstNode is c2.
502     bool isMulAddWithConstProfitable(SDNode *MulNode,
503                                      SDValue &AddNode,
504                                      SDValue &ConstNode);
505 
506     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
507     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
508     /// the type of the loaded value to be extended.
509     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
510                           EVT LoadResultTy, EVT &ExtVT);
511 
512     /// Helper function to calculate whether the given Load/Store can have its
513     /// width reduced to ExtVT.
514     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
515                            EVT &MemVT, unsigned ShAmt = 0);
516 
517     /// Used by BackwardsPropagateMask to find suitable loads.
518     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
519                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
520                            ConstantSDNode *Mask, SDNode *&NodeToMask);
521     /// Attempt to propagate a given AND node back to load leaves so that they
522     /// can be combined into narrow loads.
523     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
524 
525     /// Helper function for MergeConsecutiveStores which merges the
526     /// component store chains.
527     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
528                                 unsigned NumStores);
529 
530     /// This is a helper function for MergeConsecutiveStores. When the
531     /// source elements of the consecutive stores are all constants or
532     /// all extracted vector elements, try to merge them into one
533     /// larger store introducing bitcasts if necessary.  \return True
534     /// if a merged store was created.
535     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
536                                          EVT MemVT, unsigned NumStores,
537                                          bool IsConstantSrc, bool UseVector,
538                                          bool UseTrunc);
539 
540     /// This is a helper function for MergeConsecutiveStores. Stores
541     /// that potentially may be merged with St are placed in
542     /// StoreNodes. RootNode is a chain predecessor to all store
543     /// candidates.
544     void getStoreMergeCandidates(StoreSDNode *St,
545                                  SmallVectorImpl<MemOpLink> &StoreNodes,
546                                  SDNode *&Root);
547 
548     /// Helper function for MergeConsecutiveStores. Checks if
549     /// candidate stores have indirect dependency through their
550     /// operands. RootNode is the predecessor to all stores calculated
551     /// by getStoreMergeCandidates and is used to prune the dependency check.
552     /// \return True if safe to merge.
553     bool checkMergeStoreCandidatesForDependencies(
554         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
555         SDNode *RootNode);
556 
557     /// Merge consecutive store operations into a wide store.
558     /// This optimization uses wide integers or vectors when possible.
559     /// \return number of stores that were merged into a merged store (the
560     /// affected nodes are stored as a prefix in \p StoreNodes).
561     bool MergeConsecutiveStores(StoreSDNode *St);
562 
563     /// Try to transform a truncation where C is a constant:
564     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
565     ///
566     /// \p N needs to be a truncation and its first operand an AND. Other
567     /// requirements are checked by the function (e.g. that trunc is
568     /// single-use) and if missed an empty SDValue is returned.
569     SDValue distributeTruncateThroughAnd(SDNode *N);
570 
571     /// Helper function to determine whether the target supports operation
572     /// given by \p Opcode for type \p VT, that is, whether the operation
573     /// is legal or custom before legalizing operations, and whether is
574     /// legal (but not custom) after legalization.
575     bool hasOperation(unsigned Opcode, EVT VT) {
576       if (LegalOperations)
577         return TLI.isOperationLegal(Opcode, VT);
578       return TLI.isOperationLegalOrCustom(Opcode, VT);
579     }
580 
581   public:
582     /// Runs the dag combiner on all nodes in the work list
583     void Run(CombineLevel AtLevel);
584 
585     SelectionDAG &getDAG() const { return DAG; }
586 
587     /// Returns a type large enough to hold any valid shift amount - before type
588     /// legalization these can be huge.
589     EVT getShiftAmountTy(EVT LHSTy) {
590       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
591       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
592     }
593 
594     /// This method returns true if we are running before type legalization or
595     /// if the specified VT is legal.
596     bool isTypeLegal(const EVT &VT) {
597       if (!LegalTypes) return true;
598       return TLI.isTypeLegal(VT);
599     }
600 
601     /// Convenience wrapper around TargetLowering::getSetCCResultType
602     EVT getSetCCResultType(EVT VT) const {
603       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
604     }
605 
606     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
607                          SDValue OrigLoad, SDValue ExtLoad,
608                          ISD::NodeType ExtType);
609   };
610 
611 /// This class is a DAGUpdateListener that removes any deleted
612 /// nodes from the worklist.
613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
614   DAGCombiner &DC;
615 
616 public:
617   explicit WorklistRemover(DAGCombiner &dc)
618     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
619 
620   void NodeDeleted(SDNode *N, SDNode *E) override {
621     DC.removeFromWorklist(N);
622   }
623 };
624 
625 } // end anonymous namespace
626 
627 //===----------------------------------------------------------------------===//
628 //  TargetLowering::DAGCombinerInfo implementation
629 //===----------------------------------------------------------------------===//
630 
631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
632   ((DAGCombiner*)DC)->AddToWorklist(N);
633 }
634 
635 SDValue TargetLowering::DAGCombinerInfo::
636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
637   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
638 }
639 
640 SDValue TargetLowering::DAGCombinerInfo::
641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
642   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
643 }
644 
645 SDValue TargetLowering::DAGCombinerInfo::
646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
647   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
648 }
649 
650 void TargetLowering::DAGCombinerInfo::
651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
652   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
653 }
654 
655 //===----------------------------------------------------------------------===//
656 // Helper Functions
657 //===----------------------------------------------------------------------===//
658 
659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
660   removeFromWorklist(N);
661 
662   // If the operands of this node are only used by the node, they will now be
663   // dead. Make sure to re-visit them and recursively delete dead nodes.
664   for (const SDValue &Op : N->ops())
665     // For an operand generating multiple values, one of the values may
666     // become dead allowing further simplification (e.g. split index
667     // arithmetic from an indexed load).
668     if (Op->hasOneUse() || Op->getNumValues() > 1)
669       AddToWorklist(Op.getNode());
670 
671   DAG.DeleteNode(N);
672 }
673 
674 /// Return 1 if we can compute the negated form of the specified expression for
675 /// the same cost as the expression itself, or 2 if we can compute the negated
676 /// form more cheaply than the expression itself.
677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
678                                const TargetLowering &TLI,
679                                const TargetOptions *Options,
680                                unsigned Depth = 0) {
681   // fneg is removable even if it has multiple uses.
682   if (Op.getOpcode() == ISD::FNEG) return 2;
683 
684   // Don't allow anything with multiple uses unless we know it is free.
685   EVT VT = Op.getValueType();
686   const SDNodeFlags Flags = Op->getFlags();
687   if (!Op.hasOneUse())
688     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
689           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
690       return 0;
691 
692   // Don't recurse exponentially.
693   if (Depth > 6) return 0;
694 
695   switch (Op.getOpcode()) {
696   default: return false;
697   case ISD::ConstantFP: {
698     if (!LegalOperations)
699       return 1;
700 
701     // Don't invert constant FP values after legalization unless the target says
702     // the negated constant is legal.
703     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
704       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
705   }
706   case ISD::FADD:
707     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
708       return 0;
709 
710     // After operation legalization, it might not be legal to create new FSUBs.
711     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
712       return 0;
713 
714     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
715     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
716                                     Options, Depth + 1))
717       return V;
718     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
719     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
720                               Depth + 1);
721   case ISD::FSUB:
722     // We can't turn -(A-B) into B-A when we honor signed zeros.
723     if (!Options->NoSignedZerosFPMath &&
724         !Flags.hasNoSignedZeros())
725       return 0;
726 
727     // fold (fneg (fsub A, B)) -> (fsub B, A)
728     return 1;
729 
730   case ISD::FMUL:
731   case ISD::FDIV:
732     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
733     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
734                                     Options, Depth + 1))
735       return V;
736 
737     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
738                               Depth + 1);
739 
740   case ISD::FP_EXTEND:
741   case ISD::FP_ROUND:
742   case ISD::FSIN:
743     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
744                               Depth + 1);
745   }
746 }
747 
748 /// If isNegatibleForFree returns true, return the newly negated expression.
749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
750                                     bool LegalOperations, unsigned Depth = 0) {
751   const TargetOptions &Options = DAG.getTarget().Options;
752   // fneg is removable even if it has multiple uses.
753   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
754 
755   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
756 
757   const SDNodeFlags Flags = Op.getNode()->getFlags();
758 
759   switch (Op.getOpcode()) {
760   default: llvm_unreachable("Unknown code");
761   case ISD::ConstantFP: {
762     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
763     V.changeSign();
764     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
765   }
766   case ISD::FADD:
767     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
768 
769     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
770     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
771                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
772       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
773                          GetNegatedExpression(Op.getOperand(0), DAG,
774                                               LegalOperations, Depth+1),
775                          Op.getOperand(1), Flags);
776     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
777     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
778                        GetNegatedExpression(Op.getOperand(1), DAG,
779                                             LegalOperations, Depth+1),
780                        Op.getOperand(0), Flags);
781   case ISD::FSUB:
782     // fold (fneg (fsub 0, B)) -> B
783     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
784       if (N0CFP->isZero())
785         return Op.getOperand(1);
786 
787     // fold (fneg (fsub A, B)) -> (fsub B, A)
788     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
789                        Op.getOperand(1), Op.getOperand(0), Flags);
790 
791   case ISD::FMUL:
792   case ISD::FDIV:
793     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
794     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
795                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
796       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
797                          GetNegatedExpression(Op.getOperand(0), DAG,
798                                               LegalOperations, Depth+1),
799                          Op.getOperand(1), Flags);
800 
801     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
802     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
803                        Op.getOperand(0),
804                        GetNegatedExpression(Op.getOperand(1), DAG,
805                                             LegalOperations, Depth+1), Flags);
806 
807   case ISD::FP_EXTEND:
808   case ISD::FSIN:
809     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810                        GetNegatedExpression(Op.getOperand(0), DAG,
811                                             LegalOperations, Depth+1));
812   case ISD::FP_ROUND:
813       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
814                          GetNegatedExpression(Op.getOperand(0), DAG,
815                                               LegalOperations, Depth+1),
816                          Op.getOperand(1));
817   }
818 }
819 
820 // APInts must be the same size for most operations, this helper
821 // function zero extends the shorter of the pair so that they match.
822 // We provide an Offset so that we can create bitwidths that won't overflow.
823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
824   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
825   LHS = LHS.zextOrSelf(Bits);
826   RHS = RHS.zextOrSelf(Bits);
827 }
828 
829 // Return true if this node is a setcc, or is a select_cc
830 // that selects between the target values used for true and false, making it
831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
832 // the appropriate nodes based on the type of node we are checking. This
833 // simplifies life a bit for the callers.
834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
835                                     SDValue &CC) const {
836   if (N.getOpcode() == ISD::SETCC) {
837     LHS = N.getOperand(0);
838     RHS = N.getOperand(1);
839     CC  = N.getOperand(2);
840     return true;
841   }
842 
843   if (N.getOpcode() != ISD::SELECT_CC ||
844       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
845       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
846     return false;
847 
848   if (TLI.getBooleanContents(N.getValueType()) ==
849       TargetLowering::UndefinedBooleanContent)
850     return false;
851 
852   LHS = N.getOperand(0);
853   RHS = N.getOperand(1);
854   CC  = N.getOperand(4);
855   return true;
856 }
857 
858 /// Return true if this is a SetCC-equivalent operation with only one use.
859 /// If this is true, it allows the users to invert the operation for free when
860 /// it is profitable to do so.
861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
862   SDValue N0, N1, N2;
863   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
864     return true;
865   return false;
866 }
867 
868 static SDValue peekThroughBitcast(SDValue V) {
869   while (V.getOpcode() == ISD::BITCAST)
870     V = V.getOperand(0);
871   return V;
872 }
873 
874 // Returns the SDNode if it is a constant float BuildVector
875 // or constant float.
876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
877   if (isa<ConstantFPSDNode>(N))
878     return N.getNode();
879   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
880     return N.getNode();
881   return nullptr;
882 }
883 
884 // Determines if it is a constant integer or a build vector of constant
885 // integers (and undefs).
886 // Do not permit build vector implicit truncation.
887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
888   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
889     return !(Const->isOpaque() && NoOpaques);
890   if (N.getOpcode() != ISD::BUILD_VECTOR)
891     return false;
892   unsigned BitWidth = N.getScalarValueSizeInBits();
893   for (const SDValue &Op : N->op_values()) {
894     if (Op.isUndef())
895       continue;
896     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
897     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
898         (Const->isOpaque() && NoOpaques))
899       return false;
900   }
901   return true;
902 }
903 
904 // Determines if it is a constant null integer or a splatted vector of a
905 // constant null integer (with no undefs).
906 // Build vector implicit truncation is not an issue for null values.
907 static bool isNullConstantOrNullSplatConstant(SDValue N) {
908   // TODO: may want to use peekThroughBitcast() here.
909   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
910     return Splat->isNullValue();
911   return false;
912 }
913 
914 // Determines if it is a constant integer of one or a splatted vector of a
915 // constant integer of one (with no undefs).
916 // Do not permit build vector implicit truncation.
917 static bool isOneConstantOrOneSplatConstant(SDValue N) {
918   // TODO: may want to use peekThroughBitcast() here.
919   unsigned BitWidth = N.getScalarValueSizeInBits();
920   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
921     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
922   return false;
923 }
924 
925 // Determines if it is a constant integer of all ones or a splatted vector of a
926 // constant integer of all ones (with no undefs).
927 // Do not permit build vector implicit truncation.
928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
929   N = peekThroughBitcast(N);
930   unsigned BitWidth = N.getScalarValueSizeInBits();
931   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
932     return Splat->isAllOnesValue() &&
933            Splat->getAPIntValue().getBitWidth() == BitWidth;
934   return false;
935 }
936 
937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
938 // undef's.
939 static bool isAnyConstantBuildVector(const SDNode *N) {
940   return ISD::isBuildVectorOfConstantSDNodes(N) ||
941          ISD::isBuildVectorOfConstantFPSDNodes(N);
942 }
943 
944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
945                                     SDValue N1, SDNodeFlags Flags) {
946   // Don't reassociate reductions.
947   if (Flags.hasVectorReduction())
948     return SDValue();
949 
950   EVT VT = N0.getValueType();
951   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
952     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
953       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
954         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
955         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
956           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
957         return SDValue();
958       }
959       if (N0.hasOneUse()) {
960         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
961         // use
962         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
963         if (!OpNode.getNode())
964           return SDValue();
965         AddToWorklist(OpNode.getNode());
966         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
967       }
968     }
969   }
970 
971   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
972     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
973       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
974         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
975         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
976           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
977         return SDValue();
978       }
979       if (N1.hasOneUse()) {
980         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
981         // use
982         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
983         if (!OpNode.getNode())
984           return SDValue();
985         AddToWorklist(OpNode.getNode());
986         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
987       }
988     }
989   }
990 
991   return SDValue();
992 }
993 
994 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
995                                bool AddTo) {
996   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
997   ++NodesCombined;
998   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
999              To[0].getNode()->dump(&DAG);
1000              dbgs() << " and " << NumTo - 1 << " other values\n");
1001   for (unsigned i = 0, e = NumTo; i != e; ++i)
1002     assert((!To[i].getNode() ||
1003             N->getValueType(i) == To[i].getValueType()) &&
1004            "Cannot combine value to value of different type!");
1005 
1006   WorklistRemover DeadNodes(*this);
1007   DAG.ReplaceAllUsesWith(N, To);
1008   if (AddTo) {
1009     // Push the new nodes and any users onto the worklist
1010     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1011       if (To[i].getNode()) {
1012         AddToWorklist(To[i].getNode());
1013         AddUsersToWorklist(To[i].getNode());
1014       }
1015     }
1016   }
1017 
1018   // Finally, if the node is now dead, remove it from the graph.  The node
1019   // may not be dead if the replacement process recursively simplified to
1020   // something else needing this node.
1021   if (N->use_empty())
1022     deleteAndRecombine(N);
1023   return SDValue(N, 0);
1024 }
1025 
1026 void DAGCombiner::
1027 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1028   // Replace all uses.  If any nodes become isomorphic to other nodes and
1029   // are deleted, make sure to remove them from our worklist.
1030   WorklistRemover DeadNodes(*this);
1031   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1032 
1033   // Push the new node and any (possibly new) users onto the worklist.
1034   AddToWorklist(TLO.New.getNode());
1035   AddUsersToWorklist(TLO.New.getNode());
1036 
1037   // Finally, if the node is now dead, remove it from the graph.  The node
1038   // may not be dead if the replacement process recursively simplified to
1039   // something else needing this node.
1040   if (TLO.Old.getNode()->use_empty())
1041     deleteAndRecombine(TLO.Old.getNode());
1042 }
1043 
1044 /// Check the specified integer node value to see if it can be simplified or if
1045 /// things it uses can be simplified by bit propagation. If so, return true.
1046 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1047   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1048   KnownBits Known;
1049   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1050     return false;
1051 
1052   // Revisit the node.
1053   AddToWorklist(Op.getNode());
1054 
1055   // Replace the old value with the new one.
1056   ++NodesCombined;
1057   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1058              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1059              dbgs() << '\n');
1060 
1061   CommitTargetLoweringOpt(TLO);
1062   return true;
1063 }
1064 
1065 /// Check the specified vector node value to see if it can be simplified or
1066 /// if things it uses can be simplified as it only uses some of the elements.
1067 /// If so, return true.
1068 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1069                                              bool AssumeSingleUse) {
1070   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1071   APInt KnownUndef, KnownZero;
1072   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1073                                       0, AssumeSingleUse))
1074     return false;
1075 
1076   // Revisit the node.
1077   AddToWorklist(Op.getNode());
1078 
1079   // Replace the old value with the new one.
1080   ++NodesCombined;
1081   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1082              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1083              dbgs() << '\n');
1084 
1085   CommitTargetLoweringOpt(TLO);
1086   return true;
1087 }
1088 
1089 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1090   SDLoc DL(Load);
1091   EVT VT = Load->getValueType(0);
1092   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1093 
1094   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1095              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1096   WorklistRemover DeadNodes(*this);
1097   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1098   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1099   deleteAndRecombine(Load);
1100   AddToWorklist(Trunc.getNode());
1101 }
1102 
1103 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1104   Replace = false;
1105   SDLoc DL(Op);
1106   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1107     LoadSDNode *LD = cast<LoadSDNode>(Op);
1108     EVT MemVT = LD->getMemoryVT();
1109     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1110                                                       : LD->getExtensionType();
1111     Replace = true;
1112     return DAG.getExtLoad(ExtType, DL, PVT,
1113                           LD->getChain(), LD->getBasePtr(),
1114                           MemVT, LD->getMemOperand());
1115   }
1116 
1117   unsigned Opc = Op.getOpcode();
1118   switch (Opc) {
1119   default: break;
1120   case ISD::AssertSext:
1121     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1122       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1123     break;
1124   case ISD::AssertZext:
1125     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1126       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1127     break;
1128   case ISD::Constant: {
1129     unsigned ExtOpc =
1130       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1131     return DAG.getNode(ExtOpc, DL, PVT, Op);
1132   }
1133   }
1134 
1135   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1136     return SDValue();
1137   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1138 }
1139 
1140 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1141   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1142     return SDValue();
1143   EVT OldVT = Op.getValueType();
1144   SDLoc DL(Op);
1145   bool Replace = false;
1146   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1147   if (!NewOp.getNode())
1148     return SDValue();
1149   AddToWorklist(NewOp.getNode());
1150 
1151   if (Replace)
1152     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1153   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1154                      DAG.getValueType(OldVT));
1155 }
1156 
1157 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1158   EVT OldVT = Op.getValueType();
1159   SDLoc DL(Op);
1160   bool Replace = false;
1161   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1162   if (!NewOp.getNode())
1163     return SDValue();
1164   AddToWorklist(NewOp.getNode());
1165 
1166   if (Replace)
1167     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1168   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1169 }
1170 
1171 /// Promote the specified integer binary operation if the target indicates it is
1172 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1173 /// i32 since i16 instructions are longer.
1174 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1175   if (!LegalOperations)
1176     return SDValue();
1177 
1178   EVT VT = Op.getValueType();
1179   if (VT.isVector() || !VT.isInteger())
1180     return SDValue();
1181 
1182   // If operation type is 'undesirable', e.g. i16 on x86, consider
1183   // promoting it.
1184   unsigned Opc = Op.getOpcode();
1185   if (TLI.isTypeDesirableForOp(Opc, VT))
1186     return SDValue();
1187 
1188   EVT PVT = VT;
1189   // Consult target whether it is a good idea to promote this operation and
1190   // what's the right type to promote it to.
1191   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1192     assert(PVT != VT && "Don't know what type to promote to!");
1193 
1194     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1195 
1196     bool Replace0 = false;
1197     SDValue N0 = Op.getOperand(0);
1198     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1199 
1200     bool Replace1 = false;
1201     SDValue N1 = Op.getOperand(1);
1202     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1203     SDLoc DL(Op);
1204 
1205     SDValue RV =
1206         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1207 
1208     // We are always replacing N0/N1's use in N and only need
1209     // additional replacements if there are additional uses.
1210     Replace0 &= !N0->hasOneUse();
1211     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1212 
1213     // Combine Op here so it is preserved past replacements.
1214     CombineTo(Op.getNode(), RV);
1215 
1216     // If operands have a use ordering, make sure we deal with
1217     // predecessor first.
1218     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1219       std::swap(N0, N1);
1220       std::swap(NN0, NN1);
1221     }
1222 
1223     if (Replace0) {
1224       AddToWorklist(NN0.getNode());
1225       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1226     }
1227     if (Replace1) {
1228       AddToWorklist(NN1.getNode());
1229       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1230     }
1231     return Op;
1232   }
1233   return SDValue();
1234 }
1235 
1236 /// Promote the specified integer shift operation if the target indicates it is
1237 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1238 /// i32 since i16 instructions are longer.
1239 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1240   if (!LegalOperations)
1241     return SDValue();
1242 
1243   EVT VT = Op.getValueType();
1244   if (VT.isVector() || !VT.isInteger())
1245     return SDValue();
1246 
1247   // If operation type is 'undesirable', e.g. i16 on x86, consider
1248   // promoting it.
1249   unsigned Opc = Op.getOpcode();
1250   if (TLI.isTypeDesirableForOp(Opc, VT))
1251     return SDValue();
1252 
1253   EVT PVT = VT;
1254   // Consult target whether it is a good idea to promote this operation and
1255   // what's the right type to promote it to.
1256   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1257     assert(PVT != VT && "Don't know what type to promote to!");
1258 
1259     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1260 
1261     bool Replace = false;
1262     SDValue N0 = Op.getOperand(0);
1263     SDValue N1 = Op.getOperand(1);
1264     if (Opc == ISD::SRA)
1265       N0 = SExtPromoteOperand(N0, PVT);
1266     else if (Opc == ISD::SRL)
1267       N0 = ZExtPromoteOperand(N0, PVT);
1268     else
1269       N0 = PromoteOperand(N0, PVT, Replace);
1270 
1271     if (!N0.getNode())
1272       return SDValue();
1273 
1274     SDLoc DL(Op);
1275     SDValue RV =
1276         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1277 
1278     AddToWorklist(N0.getNode());
1279     if (Replace)
1280       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1281 
1282     // Deal with Op being deleted.
1283     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1284       return RV;
1285   }
1286   return SDValue();
1287 }
1288 
1289 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1290   if (!LegalOperations)
1291     return SDValue();
1292 
1293   EVT VT = Op.getValueType();
1294   if (VT.isVector() || !VT.isInteger())
1295     return SDValue();
1296 
1297   // If operation type is 'undesirable', e.g. i16 on x86, consider
1298   // promoting it.
1299   unsigned Opc = Op.getOpcode();
1300   if (TLI.isTypeDesirableForOp(Opc, VT))
1301     return SDValue();
1302 
1303   EVT PVT = VT;
1304   // Consult target whether it is a good idea to promote this operation and
1305   // what's the right type to promote it to.
1306   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1307     assert(PVT != VT && "Don't know what type to promote to!");
1308     // fold (aext (aext x)) -> (aext x)
1309     // fold (aext (zext x)) -> (zext x)
1310     // fold (aext (sext x)) -> (sext x)
1311     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1312     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1313   }
1314   return SDValue();
1315 }
1316 
1317 bool DAGCombiner::PromoteLoad(SDValue Op) {
1318   if (!LegalOperations)
1319     return false;
1320 
1321   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1322     return false;
1323 
1324   EVT VT = Op.getValueType();
1325   if (VT.isVector() || !VT.isInteger())
1326     return false;
1327 
1328   // If operation type is 'undesirable', e.g. i16 on x86, consider
1329   // promoting it.
1330   unsigned Opc = Op.getOpcode();
1331   if (TLI.isTypeDesirableForOp(Opc, VT))
1332     return false;
1333 
1334   EVT PVT = VT;
1335   // Consult target whether it is a good idea to promote this operation and
1336   // what's the right type to promote it to.
1337   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1338     assert(PVT != VT && "Don't know what type to promote to!");
1339 
1340     SDLoc DL(Op);
1341     SDNode *N = Op.getNode();
1342     LoadSDNode *LD = cast<LoadSDNode>(N);
1343     EVT MemVT = LD->getMemoryVT();
1344     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1345                                                       : LD->getExtensionType();
1346     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1347                                    LD->getChain(), LD->getBasePtr(),
1348                                    MemVT, LD->getMemOperand());
1349     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1350 
1351     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1352                Result.getNode()->dump(&DAG); dbgs() << '\n');
1353     WorklistRemover DeadNodes(*this);
1354     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1355     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1356     deleteAndRecombine(N);
1357     AddToWorklist(Result.getNode());
1358     return true;
1359   }
1360   return false;
1361 }
1362 
1363 /// Recursively delete a node which has no uses and any operands for
1364 /// which it is the only use.
1365 ///
1366 /// Note that this both deletes the nodes and removes them from the worklist.
1367 /// It also adds any nodes who have had a user deleted to the worklist as they
1368 /// may now have only one use and subject to other combines.
1369 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1370   if (!N->use_empty())
1371     return false;
1372 
1373   SmallSetVector<SDNode *, 16> Nodes;
1374   Nodes.insert(N);
1375   do {
1376     N = Nodes.pop_back_val();
1377     if (!N)
1378       continue;
1379 
1380     if (N->use_empty()) {
1381       for (const SDValue &ChildN : N->op_values())
1382         Nodes.insert(ChildN.getNode());
1383 
1384       removeFromWorklist(N);
1385       DAG.DeleteNode(N);
1386     } else {
1387       AddToWorklist(N);
1388     }
1389   } while (!Nodes.empty());
1390   return true;
1391 }
1392 
1393 //===----------------------------------------------------------------------===//
1394 //  Main DAG Combiner implementation
1395 //===----------------------------------------------------------------------===//
1396 
1397 void DAGCombiner::Run(CombineLevel AtLevel) {
1398   // set the instance variables, so that the various visit routines may use it.
1399   Level = AtLevel;
1400   LegalOperations = Level >= AfterLegalizeVectorOps;
1401   LegalTypes = Level >= AfterLegalizeTypes;
1402 
1403   // Add all the dag nodes to the worklist.
1404   for (SDNode &Node : DAG.allnodes())
1405     AddToWorklist(&Node);
1406 
1407   // Create a dummy node (which is not added to allnodes), that adds a reference
1408   // to the root node, preventing it from being deleted, and tracking any
1409   // changes of the root.
1410   HandleSDNode Dummy(DAG.getRoot());
1411 
1412   // While the worklist isn't empty, find a node and try to combine it.
1413   while (!WorklistMap.empty()) {
1414     SDNode *N;
1415     // The Worklist holds the SDNodes in order, but it may contain null entries.
1416     do {
1417       N = Worklist.pop_back_val();
1418     } while (!N);
1419 
1420     bool GoodWorklistEntry = WorklistMap.erase(N);
1421     (void)GoodWorklistEntry;
1422     assert(GoodWorklistEntry &&
1423            "Found a worklist entry without a corresponding map entry!");
1424 
1425     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1426     // N is deleted from the DAG, since they too may now be dead or may have a
1427     // reduced number of uses, allowing other xforms.
1428     if (recursivelyDeleteUnusedNodes(N))
1429       continue;
1430 
1431     WorklistRemover DeadNodes(*this);
1432 
1433     // If this combine is running after legalizing the DAG, re-legalize any
1434     // nodes pulled off the worklist.
1435     if (Level == AfterLegalizeDAG) {
1436       SmallSetVector<SDNode *, 16> UpdatedNodes;
1437       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1438 
1439       for (SDNode *LN : UpdatedNodes) {
1440         AddToWorklist(LN);
1441         AddUsersToWorklist(LN);
1442       }
1443       if (!NIsValid)
1444         continue;
1445     }
1446 
1447     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1448 
1449     // Add any operands of the new node which have not yet been combined to the
1450     // worklist as well. Because the worklist uniques things already, this
1451     // won't repeatedly process the same operand.
1452     CombinedNodes.insert(N);
1453     for (const SDValue &ChildN : N->op_values())
1454       if (!CombinedNodes.count(ChildN.getNode()))
1455         AddToWorklist(ChildN.getNode());
1456 
1457     SDValue RV = combine(N);
1458 
1459     if (!RV.getNode())
1460       continue;
1461 
1462     ++NodesCombined;
1463 
1464     // If we get back the same node we passed in, rather than a new node or
1465     // zero, we know that the node must have defined multiple values and
1466     // CombineTo was used.  Since CombineTo takes care of the worklist
1467     // mechanics for us, we have no work to do in this case.
1468     if (RV.getNode() == N)
1469       continue;
1470 
1471     assert(N->getOpcode() != ISD::DELETED_NODE &&
1472            RV.getOpcode() != ISD::DELETED_NODE &&
1473            "Node was deleted but visit returned new node!");
1474 
1475     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1476 
1477     if (N->getNumValues() == RV.getNode()->getNumValues())
1478       DAG.ReplaceAllUsesWith(N, RV.getNode());
1479     else {
1480       assert(N->getValueType(0) == RV.getValueType() &&
1481              N->getNumValues() == 1 && "Type mismatch");
1482       DAG.ReplaceAllUsesWith(N, &RV);
1483     }
1484 
1485     // Push the new node and any users onto the worklist
1486     AddToWorklist(RV.getNode());
1487     AddUsersToWorklist(RV.getNode());
1488 
1489     // Finally, if the node is now dead, remove it from the graph.  The node
1490     // may not be dead if the replacement process recursively simplified to
1491     // something else needing this node. This will also take care of adding any
1492     // operands which have lost a user to the worklist.
1493     recursivelyDeleteUnusedNodes(N);
1494   }
1495 
1496   // If the root changed (e.g. it was a dead load, update the root).
1497   DAG.setRoot(Dummy.getValue());
1498   DAG.RemoveDeadNodes();
1499 }
1500 
1501 SDValue DAGCombiner::visit(SDNode *N) {
1502   switch (N->getOpcode()) {
1503   default: break;
1504   case ISD::TokenFactor:        return visitTokenFactor(N);
1505   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1506   case ISD::ADD:                return visitADD(N);
1507   case ISD::SUB:                return visitSUB(N);
1508   case ISD::ADDC:               return visitADDC(N);
1509   case ISD::UADDO:              return visitUADDO(N);
1510   case ISD::SUBC:               return visitSUBC(N);
1511   case ISD::USUBO:              return visitUSUBO(N);
1512   case ISD::ADDE:               return visitADDE(N);
1513   case ISD::ADDCARRY:           return visitADDCARRY(N);
1514   case ISD::SUBE:               return visitSUBE(N);
1515   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1516   case ISD::MUL:                return visitMUL(N);
1517   case ISD::SDIV:               return visitSDIV(N);
1518   case ISD::UDIV:               return visitUDIV(N);
1519   case ISD::SREM:
1520   case ISD::UREM:               return visitREM(N);
1521   case ISD::MULHU:              return visitMULHU(N);
1522   case ISD::MULHS:              return visitMULHS(N);
1523   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1524   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1525   case ISD::SMULO:              return visitSMULO(N);
1526   case ISD::UMULO:              return visitUMULO(N);
1527   case ISD::SMIN:
1528   case ISD::SMAX:
1529   case ISD::UMIN:
1530   case ISD::UMAX:               return visitIMINMAX(N);
1531   case ISD::AND:                return visitAND(N);
1532   case ISD::OR:                 return visitOR(N);
1533   case ISD::XOR:                return visitXOR(N);
1534   case ISD::SHL:                return visitSHL(N);
1535   case ISD::SRA:                return visitSRA(N);
1536   case ISD::SRL:                return visitSRL(N);
1537   case ISD::ROTR:
1538   case ISD::ROTL:               return visitRotate(N);
1539   case ISD::ABS:                return visitABS(N);
1540   case ISD::BSWAP:              return visitBSWAP(N);
1541   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1542   case ISD::CTLZ:               return visitCTLZ(N);
1543   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1544   case ISD::CTTZ:               return visitCTTZ(N);
1545   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1546   case ISD::CTPOP:              return visitCTPOP(N);
1547   case ISD::SELECT:             return visitSELECT(N);
1548   case ISD::VSELECT:            return visitVSELECT(N);
1549   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1550   case ISD::SETCC:              return visitSETCC(N);
1551   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1552   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1553   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1554   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1555   case ISD::AssertSext:
1556   case ISD::AssertZext:         return visitAssertExt(N);
1557   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1558   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1559   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1560   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1561   case ISD::BITCAST:            return visitBITCAST(N);
1562   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1563   case ISD::FADD:               return visitFADD(N);
1564   case ISD::FSUB:               return visitFSUB(N);
1565   case ISD::FMUL:               return visitFMUL(N);
1566   case ISD::FMA:                return visitFMA(N);
1567   case ISD::FDIV:               return visitFDIV(N);
1568   case ISD::FREM:               return visitFREM(N);
1569   case ISD::FSQRT:              return visitFSQRT(N);
1570   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1571   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1572   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1573   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1574   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1575   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1576   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1577   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1578   case ISD::FNEG:               return visitFNEG(N);
1579   case ISD::FABS:               return visitFABS(N);
1580   case ISD::FFLOOR:             return visitFFLOOR(N);
1581   case ISD::FMINNUM:            return visitFMINNUM(N);
1582   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1583   case ISD::FCEIL:              return visitFCEIL(N);
1584   case ISD::FTRUNC:             return visitFTRUNC(N);
1585   case ISD::BRCOND:             return visitBRCOND(N);
1586   case ISD::BR_CC:              return visitBR_CC(N);
1587   case ISD::LOAD:               return visitLOAD(N);
1588   case ISD::STORE:              return visitSTORE(N);
1589   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1590   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1591   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1592   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1593   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1594   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1595   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1596   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1597   case ISD::MGATHER:            return visitMGATHER(N);
1598   case ISD::MLOAD:              return visitMLOAD(N);
1599   case ISD::MSCATTER:           return visitMSCATTER(N);
1600   case ISD::MSTORE:             return visitMSTORE(N);
1601   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1602   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1603   }
1604   return SDValue();
1605 }
1606 
1607 SDValue DAGCombiner::combine(SDNode *N) {
1608   SDValue RV = visit(N);
1609 
1610   // If nothing happened, try a target-specific DAG combine.
1611   if (!RV.getNode()) {
1612     assert(N->getOpcode() != ISD::DELETED_NODE &&
1613            "Node was deleted but visit returned NULL!");
1614 
1615     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1616         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1617 
1618       // Expose the DAG combiner to the target combiner impls.
1619       TargetLowering::DAGCombinerInfo
1620         DagCombineInfo(DAG, Level, false, this);
1621 
1622       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1623     }
1624   }
1625 
1626   // If nothing happened still, try promoting the operation.
1627   if (!RV.getNode()) {
1628     switch (N->getOpcode()) {
1629     default: break;
1630     case ISD::ADD:
1631     case ISD::SUB:
1632     case ISD::MUL:
1633     case ISD::AND:
1634     case ISD::OR:
1635     case ISD::XOR:
1636       RV = PromoteIntBinOp(SDValue(N, 0));
1637       break;
1638     case ISD::SHL:
1639     case ISD::SRA:
1640     case ISD::SRL:
1641       RV = PromoteIntShiftOp(SDValue(N, 0));
1642       break;
1643     case ISD::SIGN_EXTEND:
1644     case ISD::ZERO_EXTEND:
1645     case ISD::ANY_EXTEND:
1646       RV = PromoteExtend(SDValue(N, 0));
1647       break;
1648     case ISD::LOAD:
1649       if (PromoteLoad(SDValue(N, 0)))
1650         RV = SDValue(N, 0);
1651       break;
1652     }
1653   }
1654 
1655   // If N is a commutative binary node, try eliminate it if the commuted
1656   // version is already present in the DAG.
1657   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1658       N->getNumValues() == 1) {
1659     SDValue N0 = N->getOperand(0);
1660     SDValue N1 = N->getOperand(1);
1661 
1662     // Constant operands are canonicalized to RHS.
1663     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1664       SDValue Ops[] = {N1, N0};
1665       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1666                                             N->getFlags());
1667       if (CSENode)
1668         return SDValue(CSENode, 0);
1669     }
1670   }
1671 
1672   return RV;
1673 }
1674 
1675 /// Given a node, return its input chain if it has one, otherwise return a null
1676 /// sd operand.
1677 static SDValue getInputChainForNode(SDNode *N) {
1678   if (unsigned NumOps = N->getNumOperands()) {
1679     if (N->getOperand(0).getValueType() == MVT::Other)
1680       return N->getOperand(0);
1681     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1682       return N->getOperand(NumOps-1);
1683     for (unsigned i = 1; i < NumOps-1; ++i)
1684       if (N->getOperand(i).getValueType() == MVT::Other)
1685         return N->getOperand(i);
1686   }
1687   return SDValue();
1688 }
1689 
1690 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1691   // If N has two operands, where one has an input chain equal to the other,
1692   // the 'other' chain is redundant.
1693   if (N->getNumOperands() == 2) {
1694     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1695       return N->getOperand(0);
1696     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1697       return N->getOperand(1);
1698   }
1699 
1700   // Don't simplify token factors if optnone.
1701   if (OptLevel == CodeGenOpt::None)
1702     return SDValue();
1703 
1704   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1705   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1706   SmallPtrSet<SDNode*, 16> SeenOps;
1707   bool Changed = false;             // If we should replace this token factor.
1708 
1709   // Start out with this token factor.
1710   TFs.push_back(N);
1711 
1712   // Iterate through token factors.  The TFs grows when new token factors are
1713   // encountered.
1714   for (unsigned i = 0; i < TFs.size(); ++i) {
1715     SDNode *TF = TFs[i];
1716 
1717     // Check each of the operands.
1718     for (const SDValue &Op : TF->op_values()) {
1719       switch (Op.getOpcode()) {
1720       case ISD::EntryToken:
1721         // Entry tokens don't need to be added to the list. They are
1722         // redundant.
1723         Changed = true;
1724         break;
1725 
1726       case ISD::TokenFactor:
1727         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1728           // Queue up for processing.
1729           TFs.push_back(Op.getNode());
1730           // Clean up in case the token factor is removed.
1731           AddToWorklist(Op.getNode());
1732           Changed = true;
1733           break;
1734         }
1735         LLVM_FALLTHROUGH;
1736 
1737       default:
1738         // Only add if it isn't already in the list.
1739         if (SeenOps.insert(Op.getNode()).second)
1740           Ops.push_back(Op);
1741         else
1742           Changed = true;
1743         break;
1744       }
1745     }
1746   }
1747 
1748   // Remove Nodes that are chained to another node in the list. Do so
1749   // by walking up chains breath-first stopping when we've seen
1750   // another operand. In general we must climb to the EntryNode, but we can exit
1751   // early if we find all remaining work is associated with just one operand as
1752   // no further pruning is possible.
1753 
1754   // List of nodes to search through and original Ops from which they originate.
1755   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1756   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1757   SmallPtrSet<SDNode *, 16> SeenChains;
1758   bool DidPruneOps = false;
1759 
1760   unsigned NumLeftToConsider = 0;
1761   for (const SDValue &Op : Ops) {
1762     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1763     OpWorkCount.push_back(1);
1764   }
1765 
1766   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1767     // If this is an Op, we can remove the op from the list. Remark any
1768     // search associated with it as from the current OpNumber.
1769     if (SeenOps.count(Op) != 0) {
1770       Changed = true;
1771       DidPruneOps = true;
1772       unsigned OrigOpNumber = 0;
1773       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1774         OrigOpNumber++;
1775       assert((OrigOpNumber != Ops.size()) &&
1776              "expected to find TokenFactor Operand");
1777       // Re-mark worklist from OrigOpNumber to OpNumber
1778       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1779         if (Worklist[i].second == OrigOpNumber) {
1780           Worklist[i].second = OpNumber;
1781         }
1782       }
1783       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1784       OpWorkCount[OrigOpNumber] = 0;
1785       NumLeftToConsider--;
1786     }
1787     // Add if it's a new chain
1788     if (SeenChains.insert(Op).second) {
1789       OpWorkCount[OpNumber]++;
1790       Worklist.push_back(std::make_pair(Op, OpNumber));
1791     }
1792   };
1793 
1794   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1795     // We need at least be consider at least 2 Ops to prune.
1796     if (NumLeftToConsider <= 1)
1797       break;
1798     auto CurNode = Worklist[i].first;
1799     auto CurOpNumber = Worklist[i].second;
1800     assert((OpWorkCount[CurOpNumber] > 0) &&
1801            "Node should not appear in worklist");
1802     switch (CurNode->getOpcode()) {
1803     case ISD::EntryToken:
1804       // Hitting EntryToken is the only way for the search to terminate without
1805       // hitting
1806       // another operand's search. Prevent us from marking this operand
1807       // considered.
1808       NumLeftToConsider++;
1809       break;
1810     case ISD::TokenFactor:
1811       for (const SDValue &Op : CurNode->op_values())
1812         AddToWorklist(i, Op.getNode(), CurOpNumber);
1813       break;
1814     case ISD::CopyFromReg:
1815     case ISD::CopyToReg:
1816       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1817       break;
1818     default:
1819       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1820         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1821       break;
1822     }
1823     OpWorkCount[CurOpNumber]--;
1824     if (OpWorkCount[CurOpNumber] == 0)
1825       NumLeftToConsider--;
1826   }
1827 
1828   // If we've changed things around then replace token factor.
1829   if (Changed) {
1830     SDValue Result;
1831     if (Ops.empty()) {
1832       // The entry token is the only possible outcome.
1833       Result = DAG.getEntryNode();
1834     } else {
1835       if (DidPruneOps) {
1836         SmallVector<SDValue, 8> PrunedOps;
1837         //
1838         for (const SDValue &Op : Ops) {
1839           if (SeenChains.count(Op.getNode()) == 0)
1840             PrunedOps.push_back(Op);
1841         }
1842         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1843       } else {
1844         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1845       }
1846     }
1847     return Result;
1848   }
1849   return SDValue();
1850 }
1851 
1852 /// MERGE_VALUES can always be eliminated.
1853 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1854   WorklistRemover DeadNodes(*this);
1855   // Replacing results may cause a different MERGE_VALUES to suddenly
1856   // be CSE'd with N, and carry its uses with it. Iterate until no
1857   // uses remain, to ensure that the node can be safely deleted.
1858   // First add the users of this node to the work list so that they
1859   // can be tried again once they have new operands.
1860   AddUsersToWorklist(N);
1861   do {
1862     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1863       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1864   } while (!N->use_empty());
1865   deleteAndRecombine(N);
1866   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1867 }
1868 
1869 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1870 /// ConstantSDNode pointer else nullptr.
1871 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1872   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1873   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1874 }
1875 
1876 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1877   auto BinOpcode = BO->getOpcode();
1878   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1879           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1880           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1881           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1882           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1883           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1884           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1885           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1886           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1887          "Unexpected binary operator");
1888 
1889   // Don't do this unless the old select is going away. We want to eliminate the
1890   // binary operator, not replace a binop with a select.
1891   // TODO: Handle ISD::SELECT_CC.
1892   unsigned SelOpNo = 0;
1893   SDValue Sel = BO->getOperand(0);
1894   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1895     SelOpNo = 1;
1896     Sel = BO->getOperand(1);
1897   }
1898 
1899   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1900     return SDValue();
1901 
1902   SDValue CT = Sel.getOperand(1);
1903   if (!isConstantOrConstantVector(CT, true) &&
1904       !isConstantFPBuildVectorOrConstantFP(CT))
1905     return SDValue();
1906 
1907   SDValue CF = Sel.getOperand(2);
1908   if (!isConstantOrConstantVector(CF, true) &&
1909       !isConstantFPBuildVectorOrConstantFP(CF))
1910     return SDValue();
1911 
1912   // Bail out if any constants are opaque because we can't constant fold those.
1913   // The exception is "and" and "or" with either 0 or -1 in which case we can
1914   // propagate non constant operands into select. I.e.:
1915   // and (select Cond, 0, -1), X --> select Cond, 0, X
1916   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1917   bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1918                          (isNullConstantOrNullSplatConstant(CT) ||
1919                           isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
1920                          (isNullConstantOrNullSplatConstant(CF) ||
1921                           isAllOnesConstantOrAllOnesSplatConstant(CF));
1922 
1923   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1924   if (!CanFoldNonConst &&
1925       !isConstantOrConstantVector(CBO, true) &&
1926       !isConstantFPBuildVectorOrConstantFP(CBO))
1927     return SDValue();
1928 
1929   EVT VT = Sel.getValueType();
1930 
1931   // In case of shift value and shift amount may have different VT. For instance
1932   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1933   // swapped operands and value types do not match. NB: x86 is fine if operands
1934   // are not swapped with shift amount VT being not bigger than shifted value.
1935   // TODO: that is possible to check for a shift operation, correct VTs and
1936   // still perform optimization on x86 if needed.
1937   if (SelOpNo && VT != CBO.getValueType())
1938     return SDValue();
1939 
1940   // We have a select-of-constants followed by a binary operator with a
1941   // constant. Eliminate the binop by pulling the constant math into the select.
1942   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1943   SDLoc DL(Sel);
1944   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1945                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1946   if (!CanFoldNonConst && !NewCT.isUndef() &&
1947       !isConstantOrConstantVector(NewCT, true) &&
1948       !isConstantFPBuildVectorOrConstantFP(NewCT))
1949     return SDValue();
1950 
1951   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1952                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1953   if (!CanFoldNonConst && !NewCF.isUndef() &&
1954       !isConstantOrConstantVector(NewCF, true) &&
1955       !isConstantFPBuildVectorOrConstantFP(NewCF))
1956     return SDValue();
1957 
1958   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1959 }
1960 
1961 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1962   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1963          "Expecting add or sub");
1964 
1965   // Match a constant operand and a zext operand for the math instruction:
1966   // add Z, C
1967   // sub C, Z
1968   bool IsAdd = N->getOpcode() == ISD::ADD;
1969   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1970   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1971   auto *CN = dyn_cast<ConstantSDNode>(C);
1972   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1973     return SDValue();
1974 
1975   // Match the zext operand as a setcc of a boolean.
1976   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1977       Z.getOperand(0).getValueType() != MVT::i1)
1978     return SDValue();
1979 
1980   // Match the compare as: setcc (X & 1), 0, eq.
1981   SDValue SetCC = Z.getOperand(0);
1982   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1983   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1984       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1985       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1986     return SDValue();
1987 
1988   // We are adding/subtracting a constant and an inverted low bit. Turn that
1989   // into a subtract/add of the low bit with incremented/decremented constant:
1990   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1991   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1992   EVT VT = C.getValueType();
1993   SDLoc DL(N);
1994   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1995   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1996                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1997   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1998 }
1999 
2000 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2001 /// a shift and add with a different constant.
2002 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2003   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2004          "Expecting add or sub");
2005 
2006   // We need a constant operand for the add/sub, and the other operand is a
2007   // logical shift right: add (srl), C or sub C, (srl).
2008   bool IsAdd = N->getOpcode() == ISD::ADD;
2009   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2010   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2011   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2012   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2013     return SDValue();
2014 
2015   // The shift must be of a 'not' value.
2016   // TODO: Use isBitwiseNot() if it works with vectors.
2017   SDValue Not = ShiftOp.getOperand(0);
2018   if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
2019       !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
2020     return SDValue();
2021 
2022   // The shift must be moving the sign bit to the least-significant-bit.
2023   EVT VT = ShiftOp.getValueType();
2024   SDValue ShAmt = ShiftOp.getOperand(1);
2025   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2026   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2027     return SDValue();
2028 
2029   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2030   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2031   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2032   SDLoc DL(N);
2033   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2034   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2035   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2036   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2037 }
2038 
2039 SDValue DAGCombiner::visitADD(SDNode *N) {
2040   SDValue N0 = N->getOperand(0);
2041   SDValue N1 = N->getOperand(1);
2042   EVT VT = N0.getValueType();
2043   SDLoc DL(N);
2044 
2045   // fold vector ops
2046   if (VT.isVector()) {
2047     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2048       return FoldedVOp;
2049 
2050     // fold (add x, 0) -> x, vector edition
2051     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2052       return N0;
2053     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2054       return N1;
2055   }
2056 
2057   // fold (add x, undef) -> undef
2058   if (N0.isUndef())
2059     return N0;
2060 
2061   if (N1.isUndef())
2062     return N1;
2063 
2064   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2065     // canonicalize constant to RHS
2066     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2067       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2068     // fold (add c1, c2) -> c1+c2
2069     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2070                                       N1.getNode());
2071   }
2072 
2073   // fold (add x, 0) -> x
2074   if (isNullConstant(N1))
2075     return N0;
2076 
2077   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2078     // fold ((c1-A)+c2) -> (c1+c2)-A
2079     if (N0.getOpcode() == ISD::SUB &&
2080         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2081       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2082       return DAG.getNode(ISD::SUB, DL, VT,
2083                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2084                          N0.getOperand(1));
2085     }
2086 
2087     // add (sext i1 X), 1 -> zext (not i1 X)
2088     // We don't transform this pattern:
2089     //   add (zext i1 X), -1 -> sext (not i1 X)
2090     // because most (?) targets generate better code for the zext form.
2091     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2092         isOneConstantOrOneSplatConstant(N1)) {
2093       SDValue X = N0.getOperand(0);
2094       if ((!LegalOperations ||
2095            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2096             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2097           X.getScalarValueSizeInBits() == 1) {
2098         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2099         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2100       }
2101     }
2102 
2103     // Undo the add -> or combine to merge constant offsets from a frame index.
2104     if (N0.getOpcode() == ISD::OR &&
2105         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2106         isa<ConstantSDNode>(N0.getOperand(1)) &&
2107         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2108       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2109       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2110     }
2111   }
2112 
2113   if (SDValue NewSel = foldBinOpIntoSelect(N))
2114     return NewSel;
2115 
2116   // reassociate add
2117   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2118     return RADD;
2119 
2120   // fold ((0-A) + B) -> B-A
2121   if (N0.getOpcode() == ISD::SUB &&
2122       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2123     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2124 
2125   // fold (A + (0-B)) -> A-B
2126   if (N1.getOpcode() == ISD::SUB &&
2127       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2128     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2129 
2130   // fold (A+(B-A)) -> B
2131   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2132     return N1.getOperand(0);
2133 
2134   // fold ((B-A)+A) -> B
2135   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2136     return N0.getOperand(0);
2137 
2138   // fold (A+(B-(A+C))) to (B-C)
2139   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140       N0 == N1.getOperand(1).getOperand(0))
2141     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142                        N1.getOperand(1).getOperand(1));
2143 
2144   // fold (A+(B-(C+A))) to (B-C)
2145   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2146       N0 == N1.getOperand(1).getOperand(1))
2147     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2148                        N1.getOperand(1).getOperand(0));
2149 
2150   // fold (A+((B-A)+or-C)) to (B+or-C)
2151   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2152       N1.getOperand(0).getOpcode() == ISD::SUB &&
2153       N0 == N1.getOperand(0).getOperand(1))
2154     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2155                        N1.getOperand(1));
2156 
2157   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2158   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2159     SDValue N00 = N0.getOperand(0);
2160     SDValue N01 = N0.getOperand(1);
2161     SDValue N10 = N1.getOperand(0);
2162     SDValue N11 = N1.getOperand(1);
2163 
2164     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2165       return DAG.getNode(ISD::SUB, DL, VT,
2166                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2167                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2168   }
2169 
2170   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2171     return V;
2172 
2173   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2174     return V;
2175 
2176   if (SimplifyDemandedBits(SDValue(N, 0)))
2177     return SDValue(N, 0);
2178 
2179   // fold (a+b) -> (a|b) iff a and b share no bits.
2180   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2181       DAG.haveNoCommonBitsSet(N0, N1))
2182     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2183 
2184   // fold (add (xor a, -1), 1) -> (sub 0, a)
2185   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
2186     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2187                        N0.getOperand(0));
2188 
2189   if (SDValue Combined = visitADDLike(N0, N1, N))
2190     return Combined;
2191 
2192   if (SDValue Combined = visitADDLike(N1, N0, N))
2193     return Combined;
2194 
2195   return SDValue();
2196 }
2197 
2198 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2199   bool Masked = false;
2200 
2201   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2202   while (true) {
2203     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2204       V = V.getOperand(0);
2205       continue;
2206     }
2207 
2208     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2209       Masked = true;
2210       V = V.getOperand(0);
2211       continue;
2212     }
2213 
2214     break;
2215   }
2216 
2217   // If this is not a carry, return.
2218   if (V.getResNo() != 1)
2219     return SDValue();
2220 
2221   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2222       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2223     return SDValue();
2224 
2225   // If the result is masked, then no matter what kind of bool it is we can
2226   // return. If it isn't, then we need to make sure the bool type is either 0 or
2227   // 1 and not other values.
2228   if (Masked ||
2229       TLI.getBooleanContents(V.getValueType()) ==
2230           TargetLoweringBase::ZeroOrOneBooleanContent)
2231     return V;
2232 
2233   return SDValue();
2234 }
2235 
2236 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2237   EVT VT = N0.getValueType();
2238   SDLoc DL(LocReference);
2239 
2240   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2241   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2242       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2243     return DAG.getNode(ISD::SUB, DL, VT, N0,
2244                        DAG.getNode(ISD::SHL, DL, VT,
2245                                    N1.getOperand(0).getOperand(1),
2246                                    N1.getOperand(1)));
2247 
2248   if (N1.getOpcode() == ISD::AND) {
2249     SDValue AndOp0 = N1.getOperand(0);
2250     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2251     unsigned DestBits = VT.getScalarSizeInBits();
2252 
2253     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2254     // and similar xforms where the inner op is either ~0 or 0.
2255     if (NumSignBits == DestBits &&
2256         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2257       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2258   }
2259 
2260   // add (sext i1), X -> sub X, (zext i1)
2261   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2262       N0.getOperand(0).getValueType() == MVT::i1 &&
2263       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2264     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2265     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2266   }
2267 
2268   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2269   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2270     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2271     if (TN->getVT() == MVT::i1) {
2272       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2273                                  DAG.getConstant(1, DL, VT));
2274       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2275     }
2276   }
2277 
2278   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2279   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2280       N1.getResNo() == 0)
2281     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2282                        N0, N1.getOperand(0), N1.getOperand(2));
2283 
2284   // (add X, Carry) -> (addcarry X, 0, Carry)
2285   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2286     if (SDValue Carry = getAsCarry(TLI, N1))
2287       return DAG.getNode(ISD::ADDCARRY, DL,
2288                          DAG.getVTList(VT, Carry.getValueType()), N0,
2289                          DAG.getConstant(0, DL, VT), Carry);
2290 
2291   return SDValue();
2292 }
2293 
2294 SDValue DAGCombiner::visitADDC(SDNode *N) {
2295   SDValue N0 = N->getOperand(0);
2296   SDValue N1 = N->getOperand(1);
2297   EVT VT = N0.getValueType();
2298   SDLoc DL(N);
2299 
2300   // If the flag result is dead, turn this into an ADD.
2301   if (!N->hasAnyUseOfValue(1))
2302     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2303                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2304 
2305   // canonicalize constant to RHS.
2306   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2307   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2308   if (N0C && !N1C)
2309     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2310 
2311   // fold (addc x, 0) -> x + no carry out
2312   if (isNullConstant(N1))
2313     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2314                                         DL, MVT::Glue));
2315 
2316   // If it cannot overflow, transform into an add.
2317   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2318     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2319                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2320 
2321   return SDValue();
2322 }
2323 
2324 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2325                            SelectionDAG &DAG, const TargetLowering &TLI) {
2326   SDValue Cst;
2327   switch (TLI.getBooleanContents(VT)) {
2328   case TargetLowering::ZeroOrOneBooleanContent:
2329   case TargetLowering::UndefinedBooleanContent:
2330     Cst = DAG.getConstant(1, DL, VT);
2331     break;
2332   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2333     Cst = DAG.getConstant(-1, DL, VT);
2334     break;
2335   }
2336 
2337   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2338 }
2339 
2340 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2341   if (V.getOpcode() != ISD::XOR) return false;
2342   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2343   if (!Const) return false;
2344 
2345   switch(TLI.getBooleanContents(VT)) {
2346     case TargetLowering::ZeroOrOneBooleanContent:
2347       return Const->isOne();
2348     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2349       return Const->isAllOnesValue();
2350     case TargetLowering::UndefinedBooleanContent:
2351       return (Const->getAPIntValue() & 0x01) == 1;
2352   }
2353   llvm_unreachable("Unsupported boolean content");
2354 }
2355 
2356 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2357   SDValue N0 = N->getOperand(0);
2358   SDValue N1 = N->getOperand(1);
2359   EVT VT = N0.getValueType();
2360   if (VT.isVector())
2361     return SDValue();
2362 
2363   EVT CarryVT = N->getValueType(1);
2364   SDLoc DL(N);
2365 
2366   // If the flag result is dead, turn this into an ADD.
2367   if (!N->hasAnyUseOfValue(1))
2368     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2369                      DAG.getUNDEF(CarryVT));
2370 
2371   // canonicalize constant to RHS.
2372   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2373   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2374   if (N0C && !N1C)
2375     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2376 
2377   // fold (uaddo x, 0) -> x + no carry out
2378   if (isNullConstant(N1))
2379     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2380 
2381   // If it cannot overflow, transform into an add.
2382   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2383     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2384                      DAG.getConstant(0, DL, CarryVT));
2385 
2386   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2387   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
2388     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2389                               DAG.getConstant(0, DL, VT),
2390                               N0.getOperand(0));
2391     return CombineTo(N, Sub,
2392                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2393   }
2394 
2395   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2396     return Combined;
2397 
2398   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2399     return Combined;
2400 
2401   return SDValue();
2402 }
2403 
2404 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2405   auto VT = N0.getValueType();
2406 
2407   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2408   // If Y + 1 cannot overflow.
2409   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2410     SDValue Y = N1.getOperand(0);
2411     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2412     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2413       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2414                          N1.getOperand(2));
2415   }
2416 
2417   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2418   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2419     if (SDValue Carry = getAsCarry(TLI, N1))
2420       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2421                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2422 
2423   return SDValue();
2424 }
2425 
2426 SDValue DAGCombiner::visitADDE(SDNode *N) {
2427   SDValue N0 = N->getOperand(0);
2428   SDValue N1 = N->getOperand(1);
2429   SDValue CarryIn = N->getOperand(2);
2430 
2431   // canonicalize constant to RHS
2432   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2433   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2434   if (N0C && !N1C)
2435     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2436                        N1, N0, CarryIn);
2437 
2438   // fold (adde x, y, false) -> (addc x, y)
2439   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2440     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2441 
2442   return SDValue();
2443 }
2444 
2445 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2446   SDValue N0 = N->getOperand(0);
2447   SDValue N1 = N->getOperand(1);
2448   SDValue CarryIn = N->getOperand(2);
2449   SDLoc DL(N);
2450 
2451   // canonicalize constant to RHS
2452   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2453   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2454   if (N0C && !N1C)
2455     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2456 
2457   // fold (addcarry x, y, false) -> (uaddo x, y)
2458   if (isNullConstant(CarryIn)) {
2459     if (!LegalOperations ||
2460         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2461       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2462   }
2463 
2464   EVT CarryVT = CarryIn.getValueType();
2465 
2466   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2467   if (isNullConstant(N0) && isNullConstant(N1)) {
2468     EVT VT = N0.getValueType();
2469     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2470     AddToWorklist(CarryExt.getNode());
2471     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2472                                     DAG.getConstant(1, DL, VT)),
2473                      DAG.getConstant(0, DL, CarryVT));
2474   }
2475 
2476   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2477   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2478       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2479     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2480                               DAG.getConstant(0, DL, N0.getValueType()),
2481                               N0.getOperand(0), CarryIn.getOperand(0));
2482     return CombineTo(N, Sub,
2483                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2484   }
2485 
2486   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2487     return Combined;
2488 
2489   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2490     return Combined;
2491 
2492   return SDValue();
2493 }
2494 
2495 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2496                                        SDNode *N) {
2497   // Iff the flag result is dead:
2498   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2499   if ((N0.getOpcode() == ISD::ADD ||
2500        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2501       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2502     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2503                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2504 
2505   /**
2506    * When one of the addcarry argument is itself a carry, we may be facing
2507    * a diamond carry propagation. In which case we try to transform the DAG
2508    * to ensure linear carry propagation if that is possible.
2509    *
2510    * We are trying to get:
2511    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2512    */
2513   if (auto Y = getAsCarry(TLI, N1)) {
2514     /**
2515      *            (uaddo A, B)
2516      *             /       \
2517      *          Carry      Sum
2518      *            |          \
2519      *            | (addcarry *, 0, Z)
2520      *            |       /
2521      *             \   Carry
2522      *              |   /
2523      * (addcarry X, *, *)
2524      */
2525     if (Y.getOpcode() == ISD::UADDO &&
2526         CarryIn.getResNo() == 1 &&
2527         CarryIn.getOpcode() == ISD::ADDCARRY &&
2528         isNullConstant(CarryIn.getOperand(1)) &&
2529         CarryIn.getOperand(0) == Y.getValue(0)) {
2530       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2531                               Y.getOperand(0), Y.getOperand(1),
2532                               CarryIn.getOperand(2));
2533       AddToWorklist(NewY.getNode());
2534       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2535                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2536                          NewY.getValue(1));
2537     }
2538   }
2539 
2540   return SDValue();
2541 }
2542 
2543 // Since it may not be valid to emit a fold to zero for vector initializers
2544 // check if we can before folding.
2545 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2546                              SelectionDAG &DAG, bool LegalOperations,
2547                              bool LegalTypes) {
2548   if (!VT.isVector())
2549     return DAG.getConstant(0, DL, VT);
2550   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2551     return DAG.getConstant(0, DL, VT);
2552   return SDValue();
2553 }
2554 
2555 SDValue DAGCombiner::visitSUB(SDNode *N) {
2556   SDValue N0 = N->getOperand(0);
2557   SDValue N1 = N->getOperand(1);
2558   EVT VT = N0.getValueType();
2559   SDLoc DL(N);
2560 
2561   // fold vector ops
2562   if (VT.isVector()) {
2563     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2564       return FoldedVOp;
2565 
2566     // fold (sub x, 0) -> x, vector edition
2567     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2568       return N0;
2569   }
2570 
2571   // fold (sub x, x) -> 0
2572   // FIXME: Refactor this and xor and other similar operations together.
2573   if (N0 == N1)
2574     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2575   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2576       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2577     // fold (sub c1, c2) -> c1-c2
2578     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2579                                       N1.getNode());
2580   }
2581 
2582   if (SDValue NewSel = foldBinOpIntoSelect(N))
2583     return NewSel;
2584 
2585   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2586 
2587   // fold (sub x, c) -> (add x, -c)
2588   if (N1C) {
2589     return DAG.getNode(ISD::ADD, DL, VT, N0,
2590                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2591   }
2592 
2593   if (isNullConstantOrNullSplatConstant(N0)) {
2594     unsigned BitWidth = VT.getScalarSizeInBits();
2595     // Right-shifting everything out but the sign bit followed by negation is
2596     // the same as flipping arithmetic/logical shift type without the negation:
2597     // -(X >>u 31) -> (X >>s 31)
2598     // -(X >>s 31) -> (X >>u 31)
2599     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2600       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2601       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2602         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2603         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2604           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2605       }
2606     }
2607 
2608     // 0 - X --> 0 if the sub is NUW.
2609     if (N->getFlags().hasNoUnsignedWrap())
2610       return N0;
2611 
2612     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2613       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2614       // N1 must be 0 because negating the minimum signed value is undefined.
2615       if (N->getFlags().hasNoSignedWrap())
2616         return N0;
2617 
2618       // 0 - X --> X if X is 0 or the minimum signed value.
2619       return N1;
2620     }
2621   }
2622 
2623   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2624   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2625     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2626 
2627   // fold (A - (0-B)) -> A+B
2628   if (N1.getOpcode() == ISD::SUB &&
2629       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2630     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2631 
2632   // fold A-(A-B) -> B
2633   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2634     return N1.getOperand(1);
2635 
2636   // fold (A+B)-A -> B
2637   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2638     return N0.getOperand(1);
2639 
2640   // fold (A+B)-B -> A
2641   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2642     return N0.getOperand(0);
2643 
2644   // fold C2-(A+C1) -> (C2-C1)-A
2645   if (N1.getOpcode() == ISD::ADD) {
2646     SDValue N11 = N1.getOperand(1);
2647     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2648         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2649       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2650       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2651     }
2652   }
2653 
2654   // fold ((A+(B+or-C))-B) -> A+or-C
2655   if (N0.getOpcode() == ISD::ADD &&
2656       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2657        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2658       N0.getOperand(1).getOperand(0) == N1)
2659     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2660                        N0.getOperand(1).getOperand(1));
2661 
2662   // fold ((A+(C+B))-B) -> A+C
2663   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2664       N0.getOperand(1).getOperand(1) == N1)
2665     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2666                        N0.getOperand(1).getOperand(0));
2667 
2668   // fold ((A-(B-C))-C) -> A-B
2669   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2670       N0.getOperand(1).getOperand(1) == N1)
2671     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2672                        N0.getOperand(1).getOperand(0));
2673 
2674   // fold (A-(B-C)) -> A+(C-B)
2675   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2676     return DAG.getNode(ISD::ADD, DL, VT, N0,
2677                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2678                                    N1.getOperand(0)));
2679 
2680   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2681   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2682     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2683         isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
2684       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2685                                 N1.getOperand(0).getOperand(1),
2686                                 N1.getOperand(1));
2687       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2688     }
2689     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2690         isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
2691       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2692                                 N1.getOperand(0),
2693                                 N1.getOperand(1).getOperand(1));
2694       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2695     }
2696   }
2697 
2698   // If either operand of a sub is undef, the result is undef
2699   if (N0.isUndef())
2700     return N0;
2701   if (N1.isUndef())
2702     return N1;
2703 
2704   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2705     return V;
2706 
2707   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2708     return V;
2709 
2710   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2711   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2712     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2713       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2714       SDValue S0 = N1.getOperand(0);
2715       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2716         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2717         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2718           if (C->getAPIntValue() == (OpSizeInBits - 1))
2719             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2720       }
2721     }
2722   }
2723 
2724   // If the relocation model supports it, consider symbol offsets.
2725   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2726     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2727       // fold (sub Sym, c) -> Sym-c
2728       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2729         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2730                                     GA->getOffset() -
2731                                         (uint64_t)N1C->getSExtValue());
2732       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2733       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2734         if (GA->getGlobal() == GB->getGlobal())
2735           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2736                                  DL, VT);
2737     }
2738 
2739   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2740   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2741     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2742     if (TN->getVT() == MVT::i1) {
2743       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2744                                  DAG.getConstant(1, DL, VT));
2745       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2746     }
2747   }
2748 
2749   // Prefer an add for more folding potential and possibly better codegen:
2750   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2751   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2752     SDValue ShAmt = N1.getOperand(1);
2753     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2754     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2755       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2756       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2757     }
2758   }
2759 
2760   return SDValue();
2761 }
2762 
2763 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2764   SDValue N0 = N->getOperand(0);
2765   SDValue N1 = N->getOperand(1);
2766   EVT VT = N0.getValueType();
2767   SDLoc DL(N);
2768 
2769   // If the flag result is dead, turn this into an SUB.
2770   if (!N->hasAnyUseOfValue(1))
2771     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2772                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2773 
2774   // fold (subc x, x) -> 0 + no borrow
2775   if (N0 == N1)
2776     return CombineTo(N, DAG.getConstant(0, DL, VT),
2777                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2778 
2779   // fold (subc x, 0) -> x + no borrow
2780   if (isNullConstant(N1))
2781     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2782 
2783   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2784   if (isAllOnesConstant(N0))
2785     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2786                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2787 
2788   return SDValue();
2789 }
2790 
2791 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2792   SDValue N0 = N->getOperand(0);
2793   SDValue N1 = N->getOperand(1);
2794   EVT VT = N0.getValueType();
2795   if (VT.isVector())
2796     return SDValue();
2797 
2798   EVT CarryVT = N->getValueType(1);
2799   SDLoc DL(N);
2800 
2801   // If the flag result is dead, turn this into an SUB.
2802   if (!N->hasAnyUseOfValue(1))
2803     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2804                      DAG.getUNDEF(CarryVT));
2805 
2806   // fold (usubo x, x) -> 0 + no borrow
2807   if (N0 == N1)
2808     return CombineTo(N, DAG.getConstant(0, DL, VT),
2809                      DAG.getConstant(0, DL, CarryVT));
2810 
2811   // fold (usubo x, 0) -> x + no borrow
2812   if (isNullConstant(N1))
2813     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2814 
2815   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2816   if (isAllOnesConstant(N0))
2817     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2818                      DAG.getConstant(0, DL, CarryVT));
2819 
2820   return SDValue();
2821 }
2822 
2823 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2824   SDValue N0 = N->getOperand(0);
2825   SDValue N1 = N->getOperand(1);
2826   SDValue CarryIn = N->getOperand(2);
2827 
2828   // fold (sube x, y, false) -> (subc x, y)
2829   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2830     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2831 
2832   return SDValue();
2833 }
2834 
2835 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2836   SDValue N0 = N->getOperand(0);
2837   SDValue N1 = N->getOperand(1);
2838   SDValue CarryIn = N->getOperand(2);
2839 
2840   // fold (subcarry x, y, false) -> (usubo x, y)
2841   if (isNullConstant(CarryIn)) {
2842     if (!LegalOperations ||
2843         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2844       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2845   }
2846 
2847   return SDValue();
2848 }
2849 
2850 SDValue DAGCombiner::visitMUL(SDNode *N) {
2851   SDValue N0 = N->getOperand(0);
2852   SDValue N1 = N->getOperand(1);
2853   EVT VT = N0.getValueType();
2854 
2855   // fold (mul x, undef) -> 0
2856   if (N0.isUndef() || N1.isUndef())
2857     return DAG.getConstant(0, SDLoc(N), VT);
2858 
2859   bool N0IsConst = false;
2860   bool N1IsConst = false;
2861   bool N1IsOpaqueConst = false;
2862   bool N0IsOpaqueConst = false;
2863   APInt ConstValue0, ConstValue1;
2864   // fold vector ops
2865   if (VT.isVector()) {
2866     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2867       return FoldedVOp;
2868 
2869     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2870     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2871     assert((!N0IsConst ||
2872             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2873            "Splat APInt should be element width");
2874     assert((!N1IsConst ||
2875             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2876            "Splat APInt should be element width");
2877   } else {
2878     N0IsConst = isa<ConstantSDNode>(N0);
2879     if (N0IsConst) {
2880       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2881       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2882     }
2883     N1IsConst = isa<ConstantSDNode>(N1);
2884     if (N1IsConst) {
2885       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2886       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2887     }
2888   }
2889 
2890   // fold (mul c1, c2) -> c1*c2
2891   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2892     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2893                                       N0.getNode(), N1.getNode());
2894 
2895   // canonicalize constant to RHS (vector doesn't have to splat)
2896   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2897      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2898     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2899   // fold (mul x, 0) -> 0
2900   if (N1IsConst && ConstValue1.isNullValue())
2901     return N1;
2902   // fold (mul x, 1) -> x
2903   if (N1IsConst && ConstValue1.isOneValue())
2904     return N0;
2905 
2906   if (SDValue NewSel = foldBinOpIntoSelect(N))
2907     return NewSel;
2908 
2909   // fold (mul x, -1) -> 0-x
2910   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2911     SDLoc DL(N);
2912     return DAG.getNode(ISD::SUB, DL, VT,
2913                        DAG.getConstant(0, DL, VT), N0);
2914   }
2915   // fold (mul x, (1 << c)) -> x << c
2916   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2917       DAG.isKnownToBeAPowerOfTwo(N1) &&
2918       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2919     SDLoc DL(N);
2920     SDValue LogBase2 = BuildLogBase2(N1, DL);
2921     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2922     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2923     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2924   }
2925   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2926   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2927     unsigned Log2Val = (-ConstValue1).logBase2();
2928     SDLoc DL(N);
2929     // FIXME: If the input is something that is easily negated (e.g. a
2930     // single-use add), we should put the negate there.
2931     return DAG.getNode(ISD::SUB, DL, VT,
2932                        DAG.getConstant(0, DL, VT),
2933                        DAG.getNode(ISD::SHL, DL, VT, N0,
2934                             DAG.getConstant(Log2Val, DL,
2935                                       getShiftAmountTy(N0.getValueType()))));
2936   }
2937 
2938   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2939   if (N0.getOpcode() == ISD::SHL &&
2940       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2941       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2942     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2943     if (isConstantOrConstantVector(C3))
2944       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2945   }
2946 
2947   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2948   // use.
2949   {
2950     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2951 
2952     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2953     if (N0.getOpcode() == ISD::SHL &&
2954         isConstantOrConstantVector(N0.getOperand(1)) &&
2955         N0.getNode()->hasOneUse()) {
2956       Sh = N0; Y = N1;
2957     } else if (N1.getOpcode() == ISD::SHL &&
2958                isConstantOrConstantVector(N1.getOperand(1)) &&
2959                N1.getNode()->hasOneUse()) {
2960       Sh = N1; Y = N0;
2961     }
2962 
2963     if (Sh.getNode()) {
2964       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2965       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2966     }
2967   }
2968 
2969   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2970   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2971       N0.getOpcode() == ISD::ADD &&
2972       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2973       isMulAddWithConstProfitable(N, N0, N1))
2974       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2975                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2976                                      N0.getOperand(0), N1),
2977                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2978                                      N0.getOperand(1), N1));
2979 
2980   // reassociate mul
2981   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
2982     return RMUL;
2983 
2984   return SDValue();
2985 }
2986 
2987 /// Return true if divmod libcall is available.
2988 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2989                                      const TargetLowering &TLI) {
2990   RTLIB::Libcall LC;
2991   EVT NodeType = Node->getValueType(0);
2992   if (!NodeType.isSimple())
2993     return false;
2994   switch (NodeType.getSimpleVT().SimpleTy) {
2995   default: return false; // No libcall for vector types.
2996   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2997   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2998   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2999   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3000   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3001   }
3002 
3003   return TLI.getLibcallName(LC) != nullptr;
3004 }
3005 
3006 /// Issue divrem if both quotient and remainder are needed.
3007 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3008   if (Node->use_empty())
3009     return SDValue(); // This is a dead node, leave it alone.
3010 
3011   unsigned Opcode = Node->getOpcode();
3012   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3013   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3014 
3015   // DivMod lib calls can still work on non-legal types if using lib-calls.
3016   EVT VT = Node->getValueType(0);
3017   if (VT.isVector() || !VT.isInteger())
3018     return SDValue();
3019 
3020   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3021     return SDValue();
3022 
3023   // If DIVREM is going to get expanded into a libcall,
3024   // but there is no libcall available, then don't combine.
3025   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3026       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3027     return SDValue();
3028 
3029   // If div is legal, it's better to do the normal expansion
3030   unsigned OtherOpcode = 0;
3031   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3032     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3033     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3034       return SDValue();
3035   } else {
3036     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3037     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3038       return SDValue();
3039   }
3040 
3041   SDValue Op0 = Node->getOperand(0);
3042   SDValue Op1 = Node->getOperand(1);
3043   SDValue combined;
3044   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3045          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3046     SDNode *User = *UI;
3047     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3048         User->use_empty())
3049       continue;
3050     // Convert the other matching node(s), too;
3051     // otherwise, the DIVREM may get target-legalized into something
3052     // target-specific that we won't be able to recognize.
3053     unsigned UserOpc = User->getOpcode();
3054     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3055         User->getOperand(0) == Op0 &&
3056         User->getOperand(1) == Op1) {
3057       if (!combined) {
3058         if (UserOpc == OtherOpcode) {
3059           SDVTList VTs = DAG.getVTList(VT, VT);
3060           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3061         } else if (UserOpc == DivRemOpc) {
3062           combined = SDValue(User, 0);
3063         } else {
3064           assert(UserOpc == Opcode);
3065           continue;
3066         }
3067       }
3068       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3069         CombineTo(User, combined);
3070       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3071         CombineTo(User, combined.getValue(1));
3072     }
3073   }
3074   return combined;
3075 }
3076 
3077 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3078   SDValue N0 = N->getOperand(0);
3079   SDValue N1 = N->getOperand(1);
3080   EVT VT = N->getValueType(0);
3081   SDLoc DL(N);
3082 
3083   // X / undef -> undef
3084   // X % undef -> undef
3085   // X / 0 -> undef
3086   // X % 0 -> undef
3087   // NOTE: This includes vectors where any divisor element is zero/undef.
3088   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
3089     return DAG.getUNDEF(VT);
3090 
3091   // undef / X -> 0
3092   // undef % X -> 0
3093   if (N0.isUndef())
3094     return DAG.getConstant(0, DL, VT);
3095 
3096   return SDValue();
3097 }
3098 
3099 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3100   SDValue N0 = N->getOperand(0);
3101   SDValue N1 = N->getOperand(1);
3102   EVT VT = N->getValueType(0);
3103   EVT CCVT = getSetCCResultType(VT);
3104 
3105   // fold vector ops
3106   if (VT.isVector())
3107     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3108       return FoldedVOp;
3109 
3110   SDLoc DL(N);
3111 
3112   // fold (sdiv c1, c2) -> c1/c2
3113   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3114   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3115   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3116     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3117   // fold (sdiv X, 1) -> X
3118   if (N1C && N1C->isOne())
3119     return N0;
3120   // fold (sdiv X, -1) -> 0-X
3121   if (N1C && N1C->isAllOnesValue())
3122     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3123   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3124   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3125     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3126                          DAG.getConstant(1, DL, VT),
3127                          DAG.getConstant(0, DL, VT));
3128 
3129   if (SDValue V = simplifyDivRem(N, DAG))
3130     return V;
3131 
3132   if (SDValue NewSel = foldBinOpIntoSelect(N))
3133     return NewSel;
3134 
3135   // If we know the sign bits of both operands are zero, strength reduce to a
3136   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3137   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3138     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3139 
3140   if (SDValue V = visitSDIVLike(N0, N1, N))
3141     return V;
3142 
3143   // sdiv, srem -> sdivrem
3144   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3145   // true.  Otherwise, we break the simplification logic in visitREM().
3146   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3147   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3148     if (SDValue DivRem = useDivRem(N))
3149         return DivRem;
3150 
3151   return SDValue();
3152 }
3153 
3154 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3155   SDLoc DL(N);
3156   EVT VT = N->getValueType(0);
3157   EVT CCVT = getSetCCResultType(VT);
3158   unsigned BitWidth = VT.getScalarSizeInBits();
3159 
3160   // Helper for determining whether a value is a power-2 constant scalar or a
3161   // vector of such elements.
3162   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3163     if (C->isNullValue() || C->isOpaque())
3164       return false;
3165     if (C->getAPIntValue().isPowerOf2())
3166       return true;
3167     if ((-C->getAPIntValue()).isPowerOf2())
3168       return true;
3169     return false;
3170   };
3171 
3172   // fold (sdiv X, pow2) -> simple ops after legalize
3173   // FIXME: We check for the exact bit here because the generic lowering gives
3174   // better results in that case. The target-specific lowering should learn how
3175   // to handle exact sdivs efficiently.
3176   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3177     // Target-specific implementation of sdiv x, pow2.
3178     if (SDValue Res = BuildSDIVPow2(N))
3179       return Res;
3180 
3181     // Create constants that are functions of the shift amount value.
3182     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3183     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3184     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3185     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3186     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3187     if (!isConstantOrConstantVector(Inexact))
3188       return SDValue();
3189 
3190     // Splat the sign bit into the register
3191     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3192                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3193     AddToWorklist(Sign.getNode());
3194 
3195     // Add (N0 < 0) ? abs2 - 1 : 0;
3196     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3197     AddToWorklist(Srl.getNode());
3198     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3199     AddToWorklist(Add.getNode());
3200     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3201     AddToWorklist(Sra.getNode());
3202 
3203     // Special case: (sdiv X, 1) -> X
3204     // Special Case: (sdiv X, -1) -> 0-X
3205     SDValue One = DAG.getConstant(1, DL, VT);
3206     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3207     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3208     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3209     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3210     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3211 
3212     // If dividing by a positive value, we're done. Otherwise, the result must
3213     // be negated.
3214     SDValue Zero = DAG.getConstant(0, DL, VT);
3215     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3216 
3217     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3218     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3219     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3220     return Res;
3221   }
3222 
3223   // If integer divide is expensive and we satisfy the requirements, emit an
3224   // alternate sequence.  Targets may check function attributes for size/speed
3225   // trade-offs.
3226   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3227   if (isConstantOrConstantVector(N1) &&
3228       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3229     if (SDValue Op = BuildSDIV(N))
3230       return Op;
3231 
3232   return SDValue();
3233 }
3234 
3235 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3236   SDValue N0 = N->getOperand(0);
3237   SDValue N1 = N->getOperand(1);
3238   EVT VT = N->getValueType(0);
3239   EVT CCVT = getSetCCResultType(VT);
3240 
3241   // fold vector ops
3242   if (VT.isVector())
3243     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3244       return FoldedVOp;
3245 
3246   SDLoc DL(N);
3247 
3248   // fold (udiv c1, c2) -> c1/c2
3249   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3250   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3251   if (N0C && N1C)
3252     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3253                                                     N0C, N1C))
3254       return Folded;
3255   // fold (udiv X, 1) -> X
3256   if (N1C && N1C->isOne())
3257     return N0;
3258   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3259   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3260     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3261                          DAG.getConstant(1, DL, VT),
3262                          DAG.getConstant(0, DL, VT));
3263 
3264   if (SDValue V = simplifyDivRem(N, DAG))
3265     return V;
3266 
3267   if (SDValue NewSel = foldBinOpIntoSelect(N))
3268     return NewSel;
3269 
3270   if (SDValue V = visitUDIVLike(N0, N1, N))
3271     return V;
3272 
3273   // sdiv, srem -> sdivrem
3274   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3275   // true.  Otherwise, we break the simplification logic in visitREM().
3276   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3277   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3278     if (SDValue DivRem = useDivRem(N))
3279         return DivRem;
3280 
3281   return SDValue();
3282 }
3283 
3284 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3285   SDLoc DL(N);
3286   EVT VT = N->getValueType(0);
3287 
3288   // fold (udiv x, (1 << c)) -> x >>u c
3289   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3290       DAG.isKnownToBeAPowerOfTwo(N1)) {
3291     SDValue LogBase2 = BuildLogBase2(N1, DL);
3292     AddToWorklist(LogBase2.getNode());
3293 
3294     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3295     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3296     AddToWorklist(Trunc.getNode());
3297     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3298   }
3299 
3300   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3301   if (N1.getOpcode() == ISD::SHL) {
3302     SDValue N10 = N1.getOperand(0);
3303     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3304         DAG.isKnownToBeAPowerOfTwo(N10)) {
3305       SDValue LogBase2 = BuildLogBase2(N10, DL);
3306       AddToWorklist(LogBase2.getNode());
3307 
3308       EVT ADDVT = N1.getOperand(1).getValueType();
3309       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3310       AddToWorklist(Trunc.getNode());
3311       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3312       AddToWorklist(Add.getNode());
3313       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3314     }
3315   }
3316 
3317   // fold (udiv x, c) -> alternate
3318   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3319   if (isConstantOrConstantVector(N1) &&
3320       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3321     if (SDValue Op = BuildUDIV(N))
3322       return Op;
3323 
3324   return SDValue();
3325 }
3326 
3327 // handles ISD::SREM and ISD::UREM
3328 SDValue DAGCombiner::visitREM(SDNode *N) {
3329   unsigned Opcode = N->getOpcode();
3330   SDValue N0 = N->getOperand(0);
3331   SDValue N1 = N->getOperand(1);
3332   EVT VT = N->getValueType(0);
3333   EVT CCVT = getSetCCResultType(VT);
3334 
3335   bool isSigned = (Opcode == ISD::SREM);
3336   SDLoc DL(N);
3337 
3338   // fold (rem c1, c2) -> c1%c2
3339   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3340   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3341   if (N0C && N1C)
3342     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3343       return Folded;
3344   // fold (urem X, -1) -> select(X == -1, 0, x)
3345   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3346     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3347                          DAG.getConstant(0, DL, VT), N0);
3348 
3349   if (SDValue V = simplifyDivRem(N, DAG))
3350     return V;
3351 
3352   if (SDValue NewSel = foldBinOpIntoSelect(N))
3353     return NewSel;
3354 
3355   if (isSigned) {
3356     // If we know the sign bits of both operands are zero, strength reduce to a
3357     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3358     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3359       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3360   } else {
3361     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3362     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3363       // fold (urem x, pow2) -> (and x, pow2-1)
3364       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3365       AddToWorklist(Add.getNode());
3366       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3367     }
3368     if (N1.getOpcode() == ISD::SHL &&
3369         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3370       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3371       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3372       AddToWorklist(Add.getNode());
3373       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3374     }
3375   }
3376 
3377   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3378 
3379   // If X/C can be simplified by the division-by-constant logic, lower
3380   // X%C to the equivalent of X-X/C*C.
3381   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3382   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3383   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3384   // combine will not return a DIVREM.  Regardless, checking cheapness here
3385   // makes sense since the simplification results in fatter code.
3386   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3387     SDValue OptimizedDiv =
3388         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3389     if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3390         OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3391       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3392       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3393       AddToWorklist(OptimizedDiv.getNode());
3394       AddToWorklist(Mul.getNode());
3395       return Sub;
3396     }
3397   }
3398 
3399   // sdiv, srem -> sdivrem
3400   if (SDValue DivRem = useDivRem(N))
3401     return DivRem.getValue(1);
3402 
3403   return SDValue();
3404 }
3405 
3406 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3407   SDValue N0 = N->getOperand(0);
3408   SDValue N1 = N->getOperand(1);
3409   EVT VT = N->getValueType(0);
3410   SDLoc DL(N);
3411 
3412   if (VT.isVector()) {
3413     // fold (mulhs x, 0) -> 0
3414     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3415       return N1;
3416     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3417       return N0;
3418   }
3419 
3420   // fold (mulhs x, 0) -> 0
3421   if (isNullConstant(N1))
3422     return N1;
3423   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3424   if (isOneConstant(N1))
3425     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3426                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3427                                        getShiftAmountTy(N0.getValueType())));
3428 
3429   // fold (mulhs x, undef) -> 0
3430   if (N0.isUndef() || N1.isUndef())
3431     return DAG.getConstant(0, DL, VT);
3432 
3433   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3434   // plus a shift.
3435   if (VT.isSimple() && !VT.isVector()) {
3436     MVT Simple = VT.getSimpleVT();
3437     unsigned SimpleSize = Simple.getSizeInBits();
3438     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3439     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3440       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3441       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3442       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3443       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3444             DAG.getConstant(SimpleSize, DL,
3445                             getShiftAmountTy(N1.getValueType())));
3446       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3447     }
3448   }
3449 
3450   return SDValue();
3451 }
3452 
3453 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3454   SDValue N0 = N->getOperand(0);
3455   SDValue N1 = N->getOperand(1);
3456   EVT VT = N->getValueType(0);
3457   SDLoc DL(N);
3458 
3459   if (VT.isVector()) {
3460     // fold (mulhu x, 0) -> 0
3461     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3462       return N1;
3463     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3464       return N0;
3465   }
3466 
3467   // fold (mulhu x, 0) -> 0
3468   if (isNullConstant(N1))
3469     return N1;
3470   // fold (mulhu x, 1) -> 0
3471   if (isOneConstant(N1))
3472     return DAG.getConstant(0, DL, N0.getValueType());
3473   // fold (mulhu x, undef) -> 0
3474   if (N0.isUndef() || N1.isUndef())
3475     return DAG.getConstant(0, DL, VT);
3476 
3477   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3478   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3479       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3480     SDLoc DL(N);
3481     unsigned NumEltBits = VT.getScalarSizeInBits();
3482     SDValue LogBase2 = BuildLogBase2(N1, DL);
3483     SDValue SRLAmt = DAG.getNode(
3484         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3485     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3486     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3487     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3488   }
3489 
3490   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3491   // plus a shift.
3492   if (VT.isSimple() && !VT.isVector()) {
3493     MVT Simple = VT.getSimpleVT();
3494     unsigned SimpleSize = Simple.getSizeInBits();
3495     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3496     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3497       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3498       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3499       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3500       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3501             DAG.getConstant(SimpleSize, DL,
3502                             getShiftAmountTy(N1.getValueType())));
3503       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3504     }
3505   }
3506 
3507   return SDValue();
3508 }
3509 
3510 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3511 /// give the opcodes for the two computations that are being performed. Return
3512 /// true if a simplification was made.
3513 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3514                                                 unsigned HiOp) {
3515   // If the high half is not needed, just compute the low half.
3516   bool HiExists = N->hasAnyUseOfValue(1);
3517   if (!HiExists &&
3518       (!LegalOperations ||
3519        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3520     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3521     return CombineTo(N, Res, Res);
3522   }
3523 
3524   // If the low half is not needed, just compute the high half.
3525   bool LoExists = N->hasAnyUseOfValue(0);
3526   if (!LoExists &&
3527       (!LegalOperations ||
3528        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3529     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3530     return CombineTo(N, Res, Res);
3531   }
3532 
3533   // If both halves are used, return as it is.
3534   if (LoExists && HiExists)
3535     return SDValue();
3536 
3537   // If the two computed results can be simplified separately, separate them.
3538   if (LoExists) {
3539     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3540     AddToWorklist(Lo.getNode());
3541     SDValue LoOpt = combine(Lo.getNode());
3542     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3543         (!LegalOperations ||
3544          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3545       return CombineTo(N, LoOpt, LoOpt);
3546   }
3547 
3548   if (HiExists) {
3549     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3550     AddToWorklist(Hi.getNode());
3551     SDValue HiOpt = combine(Hi.getNode());
3552     if (HiOpt.getNode() && HiOpt != Hi &&
3553         (!LegalOperations ||
3554          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3555       return CombineTo(N, HiOpt, HiOpt);
3556   }
3557 
3558   return SDValue();
3559 }
3560 
3561 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3562   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3563     return Res;
3564 
3565   EVT VT = N->getValueType(0);
3566   SDLoc DL(N);
3567 
3568   // If the type is twice as wide is legal, transform the mulhu to a wider
3569   // multiply plus a shift.
3570   if (VT.isSimple() && !VT.isVector()) {
3571     MVT Simple = VT.getSimpleVT();
3572     unsigned SimpleSize = Simple.getSizeInBits();
3573     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3576       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3577       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3578       // Compute the high part as N1.
3579       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3580             DAG.getConstant(SimpleSize, DL,
3581                             getShiftAmountTy(Lo.getValueType())));
3582       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3583       // Compute the low part as N0.
3584       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3585       return CombineTo(N, Lo, Hi);
3586     }
3587   }
3588 
3589   return SDValue();
3590 }
3591 
3592 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3593   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3594     return Res;
3595 
3596   EVT VT = N->getValueType(0);
3597   SDLoc DL(N);
3598 
3599   // If the type is twice as wide is legal, transform the mulhu to a wider
3600   // multiply plus a shift.
3601   if (VT.isSimple() && !VT.isVector()) {
3602     MVT Simple = VT.getSimpleVT();
3603     unsigned SimpleSize = Simple.getSizeInBits();
3604     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3605     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3606       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3607       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3608       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3609       // Compute the high part as N1.
3610       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3611             DAG.getConstant(SimpleSize, DL,
3612                             getShiftAmountTy(Lo.getValueType())));
3613       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3614       // Compute the low part as N0.
3615       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3616       return CombineTo(N, Lo, Hi);
3617     }
3618   }
3619 
3620   return SDValue();
3621 }
3622 
3623 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3624   // (smulo x, 2) -> (saddo x, x)
3625   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3626     if (C2->getAPIntValue() == 2)
3627       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3628                          N->getOperand(0), N->getOperand(0));
3629 
3630   return SDValue();
3631 }
3632 
3633 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3634   // (umulo x, 2) -> (uaddo x, x)
3635   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3636     if (C2->getAPIntValue() == 2)
3637       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3638                          N->getOperand(0), N->getOperand(0));
3639 
3640   return SDValue();
3641 }
3642 
3643 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3644   SDValue N0 = N->getOperand(0);
3645   SDValue N1 = N->getOperand(1);
3646   EVT VT = N0.getValueType();
3647 
3648   // fold vector ops
3649   if (VT.isVector())
3650     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3651       return FoldedVOp;
3652 
3653   // fold operation with constant operands.
3654   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3655   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3656   if (N0C && N1C)
3657     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3658 
3659   // canonicalize constant to RHS
3660   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3661      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3662     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3663 
3664   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3665   // Only do this if the current op isn't legal and the flipped is.
3666   unsigned Opcode = N->getOpcode();
3667   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3668   if (!TLI.isOperationLegal(Opcode, VT) &&
3669       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3670       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3671     unsigned AltOpcode;
3672     switch (Opcode) {
3673     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3674     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3675     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3676     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3677     default: llvm_unreachable("Unknown MINMAX opcode");
3678     }
3679     if (TLI.isOperationLegal(AltOpcode, VT))
3680       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3681   }
3682 
3683   return SDValue();
3684 }
3685 
3686 /// If this is a binary operator with two operands of the same opcode, try to
3687 /// simplify it.
3688 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3689   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3690   EVT VT = N0.getValueType();
3691   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3692 
3693   // Bail early if none of these transforms apply.
3694   if (N0.getNumOperands() == 0) return SDValue();
3695 
3696   // For each of OP in AND/OR/XOR:
3697   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3698   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3699   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3700   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3701   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3702   //
3703   // do not sink logical op inside of a vector extend, since it may combine
3704   // into a vsetcc.
3705   EVT Op0VT = N0.getOperand(0).getValueType();
3706   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3707        N0.getOpcode() == ISD::SIGN_EXTEND ||
3708        N0.getOpcode() == ISD::BSWAP ||
3709        // Avoid infinite looping with PromoteIntBinOp.
3710        (N0.getOpcode() == ISD::ANY_EXTEND &&
3711         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3712        (N0.getOpcode() == ISD::TRUNCATE &&
3713         (!TLI.isZExtFree(VT, Op0VT) ||
3714          !TLI.isTruncateFree(Op0VT, VT)) &&
3715         TLI.isTypeLegal(Op0VT))) &&
3716       !VT.isVector() &&
3717       Op0VT == N1.getOperand(0).getValueType() &&
3718       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3719     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3720                                  N0.getOperand(0).getValueType(),
3721                                  N0.getOperand(0), N1.getOperand(0));
3722     AddToWorklist(ORNode.getNode());
3723     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3724   }
3725 
3726   // For each of OP in SHL/SRL/SRA/AND...
3727   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3728   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3729   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3730   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3731        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3732       N0.getOperand(1) == N1.getOperand(1)) {
3733     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3734                                  N0.getOperand(0).getValueType(),
3735                                  N0.getOperand(0), N1.getOperand(0));
3736     AddToWorklist(ORNode.getNode());
3737     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3738                        ORNode, N0.getOperand(1));
3739   }
3740 
3741   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3742   // Only perform this optimization up until type legalization, before
3743   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3744   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3745   // we don't want to undo this promotion.
3746   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3747   // on scalars.
3748   if ((N0.getOpcode() == ISD::BITCAST ||
3749        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3750        Level <= AfterLegalizeTypes) {
3751     SDValue In0 = N0.getOperand(0);
3752     SDValue In1 = N1.getOperand(0);
3753     EVT In0Ty = In0.getValueType();
3754     EVT In1Ty = In1.getValueType();
3755     SDLoc DL(N);
3756     // If both incoming values are integers, and the original types are the
3757     // same.
3758     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3759       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3760       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3761       AddToWorklist(Op.getNode());
3762       return BC;
3763     }
3764   }
3765 
3766   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3767   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3768   // If both shuffles use the same mask, and both shuffle within a single
3769   // vector, then it is worthwhile to move the swizzle after the operation.
3770   // The type-legalizer generates this pattern when loading illegal
3771   // vector types from memory. In many cases this allows additional shuffle
3772   // optimizations.
3773   // There are other cases where moving the shuffle after the xor/and/or
3774   // is profitable even if shuffles don't perform a swizzle.
3775   // If both shuffles use the same mask, and both shuffles have the same first
3776   // or second operand, then it might still be profitable to move the shuffle
3777   // after the xor/and/or operation.
3778   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3779     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3780     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3781 
3782     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3783            "Inputs to shuffles are not the same type");
3784 
3785     // Check that both shuffles use the same mask. The masks are known to be of
3786     // the same length because the result vector type is the same.
3787     // Check also that shuffles have only one use to avoid introducing extra
3788     // instructions.
3789     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3790         SVN0->getMask().equals(SVN1->getMask())) {
3791       SDValue ShOp = N0->getOperand(1);
3792 
3793       // Don't try to fold this node if it requires introducing a
3794       // build vector of all zeros that might be illegal at this stage.
3795       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3796         if (!LegalTypes)
3797           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3798         else
3799           ShOp = SDValue();
3800       }
3801 
3802       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3803       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
3804       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3805       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3806         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3807                                       N0->getOperand(0), N1->getOperand(0));
3808         AddToWorklist(NewNode.getNode());
3809         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3810                                     SVN0->getMask());
3811       }
3812 
3813       // Don't try to fold this node if it requires introducing a
3814       // build vector of all zeros that might be illegal at this stage.
3815       ShOp = N0->getOperand(0);
3816       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3817         if (!LegalTypes)
3818           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3819         else
3820           ShOp = SDValue();
3821       }
3822 
3823       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3824       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
3825       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3826       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3827         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3828                                       N0->getOperand(1), N1->getOperand(1));
3829         AddToWorklist(NewNode.getNode());
3830         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3831                                     SVN0->getMask());
3832       }
3833     }
3834   }
3835 
3836   return SDValue();
3837 }
3838 
3839 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3840 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3841                                        const SDLoc &DL) {
3842   SDValue LL, LR, RL, RR, N0CC, N1CC;
3843   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3844       !isSetCCEquivalent(N1, RL, RR, N1CC))
3845     return SDValue();
3846 
3847   assert(N0.getValueType() == N1.getValueType() &&
3848          "Unexpected operand types for bitwise logic op");
3849   assert(LL.getValueType() == LR.getValueType() &&
3850          RL.getValueType() == RR.getValueType() &&
3851          "Unexpected operand types for setcc");
3852 
3853   // If we're here post-legalization or the logic op type is not i1, the logic
3854   // op type must match a setcc result type. Also, all folds require new
3855   // operations on the left and right operands, so those types must match.
3856   EVT VT = N0.getValueType();
3857   EVT OpVT = LL.getValueType();
3858   if (LegalOperations || VT.getScalarType() != MVT::i1)
3859     if (VT != getSetCCResultType(OpVT))
3860       return SDValue();
3861   if (OpVT != RL.getValueType())
3862     return SDValue();
3863 
3864   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3865   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3866   bool IsInteger = OpVT.isInteger();
3867   if (LR == RR && CC0 == CC1 && IsInteger) {
3868     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3869     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3870 
3871     // All bits clear?
3872     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3873     // All sign bits clear?
3874     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3875     // Any bits set?
3876     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3877     // Any sign bits set?
3878     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3879 
3880     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3881     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3882     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3883     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3884     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3885       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3886       AddToWorklist(Or.getNode());
3887       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3888     }
3889 
3890     // All bits set?
3891     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3892     // All sign bits set?
3893     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3894     // Any bits clear?
3895     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3896     // Any sign bits clear?
3897     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3898 
3899     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3900     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3901     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3902     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3903     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3904       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3905       AddToWorklist(And.getNode());
3906       return DAG.getSetCC(DL, VT, And, LR, CC1);
3907     }
3908   }
3909 
3910   // TODO: What is the 'or' equivalent of this fold?
3911   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3912   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3913       IsInteger && CC0 == ISD::SETNE &&
3914       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3915        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3916     SDValue One = DAG.getConstant(1, DL, OpVT);
3917     SDValue Two = DAG.getConstant(2, DL, OpVT);
3918     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3919     AddToWorklist(Add.getNode());
3920     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3921   }
3922 
3923   // Try more general transforms if the predicates match and the only user of
3924   // the compares is the 'and' or 'or'.
3925   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3926       N0.hasOneUse() && N1.hasOneUse()) {
3927     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3928     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3929     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3930       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3931       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3932       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3933       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3934       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3935     }
3936   }
3937 
3938   // Canonicalize equivalent operands to LL == RL.
3939   if (LL == RR && LR == RL) {
3940     CC1 = ISD::getSetCCSwappedOperands(CC1);
3941     std::swap(RL, RR);
3942   }
3943 
3944   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3945   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3946   if (LL == RL && LR == RR) {
3947     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3948                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3949     if (NewCC != ISD::SETCC_INVALID &&
3950         (!LegalOperations ||
3951          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3952           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3953       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3954   }
3955 
3956   return SDValue();
3957 }
3958 
3959 /// This contains all DAGCombine rules which reduce two values combined by
3960 /// an And operation to a single value. This makes them reusable in the context
3961 /// of visitSELECT(). Rules involving constants are not included as
3962 /// visitSELECT() already handles those cases.
3963 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3964   EVT VT = N1.getValueType();
3965   SDLoc DL(N);
3966 
3967   // fold (and x, undef) -> 0
3968   if (N0.isUndef() || N1.isUndef())
3969     return DAG.getConstant(0, DL, VT);
3970 
3971   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3972     return V;
3973 
3974   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3975       VT.getSizeInBits() <= 64) {
3976     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3977       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3978         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3979         // immediate for an add, but it is legal if its top c2 bits are set,
3980         // transform the ADD so the immediate doesn't need to be materialized
3981         // in a register.
3982         APInt ADDC = ADDI->getAPIntValue();
3983         APInt SRLC = SRLI->getAPIntValue();
3984         if (ADDC.getMinSignedBits() <= 64 &&
3985             SRLC.ult(VT.getSizeInBits()) &&
3986             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3987           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3988                                              SRLC.getZExtValue());
3989           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3990             ADDC |= Mask;
3991             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3992               SDLoc DL0(N0);
3993               SDValue NewAdd =
3994                 DAG.getNode(ISD::ADD, DL0, VT,
3995                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3996               CombineTo(N0.getNode(), NewAdd);
3997               // Return N so it doesn't get rechecked!
3998               return SDValue(N, 0);
3999             }
4000           }
4001         }
4002       }
4003     }
4004   }
4005 
4006   // Reduce bit extract of low half of an integer to the narrower type.
4007   // (and (srl i64:x, K), KMask) ->
4008   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4009   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4010     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4011       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4012         unsigned Size = VT.getSizeInBits();
4013         const APInt &AndMask = CAnd->getAPIntValue();
4014         unsigned ShiftBits = CShift->getZExtValue();
4015 
4016         // Bail out, this node will probably disappear anyway.
4017         if (ShiftBits == 0)
4018           return SDValue();
4019 
4020         unsigned MaskBits = AndMask.countTrailingOnes();
4021         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4022 
4023         if (AndMask.isMask() &&
4024             // Required bits must not span the two halves of the integer and
4025             // must fit in the half size type.
4026             (ShiftBits + MaskBits <= Size / 2) &&
4027             TLI.isNarrowingProfitable(VT, HalfVT) &&
4028             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4029             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4030             TLI.isTruncateFree(VT, HalfVT) &&
4031             TLI.isZExtFree(HalfVT, VT)) {
4032           // The isNarrowingProfitable is to avoid regressions on PPC and
4033           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4034           // on downstream users of this. Those patterns could probably be
4035           // extended to handle extensions mixed in.
4036 
4037           SDValue SL(N0);
4038           assert(MaskBits <= Size);
4039 
4040           // Extracting the highest bit of the low half.
4041           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4042           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4043                                       N0.getOperand(0));
4044 
4045           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4046           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4047           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4048           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4049           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4050         }
4051       }
4052     }
4053   }
4054 
4055   return SDValue();
4056 }
4057 
4058 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4059                                    EVT LoadResultTy, EVT &ExtVT) {
4060   if (!AndC->getAPIntValue().isMask())
4061     return false;
4062 
4063   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4064 
4065   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4066   EVT LoadedVT = LoadN->getMemoryVT();
4067 
4068   if (ExtVT == LoadedVT &&
4069       (!LegalOperations ||
4070        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4071     // ZEXTLOAD will match without needing to change the size of the value being
4072     // loaded.
4073     return true;
4074   }
4075 
4076   // Do not change the width of a volatile load.
4077   if (LoadN->isVolatile())
4078     return false;
4079 
4080   // Do not generate loads of non-round integer types since these can
4081   // be expensive (and would be wrong if the type is not byte sized).
4082   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4083     return false;
4084 
4085   if (LegalOperations &&
4086       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4087     return false;
4088 
4089   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4090     return false;
4091 
4092   return true;
4093 }
4094 
4095 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4096                                     ISD::LoadExtType ExtType, EVT &MemVT,
4097                                     unsigned ShAmt) {
4098   if (!LDST)
4099     return false;
4100   // Only allow byte offsets.
4101   if (ShAmt % 8)
4102     return false;
4103 
4104   // Do not generate loads of non-round integer types since these can
4105   // be expensive (and would be wrong if the type is not byte sized).
4106   if (!MemVT.isRound())
4107     return false;
4108 
4109   // Don't change the width of a volatile load.
4110   if (LDST->isVolatile())
4111     return false;
4112 
4113   // Verify that we are actually reducing a load width here.
4114   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4115     return false;
4116 
4117   // Ensure that this isn't going to produce an unsupported unaligned access.
4118   if (ShAmt &&
4119       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4120                               LDST->getAddressSpace(), ShAmt / 8))
4121     return false;
4122 
4123   // It's not possible to generate a constant of extended or untyped type.
4124   EVT PtrType = LDST->getBasePtr().getValueType();
4125   if (PtrType == MVT::Untyped || PtrType.isExtended())
4126     return false;
4127 
4128   if (isa<LoadSDNode>(LDST)) {
4129     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4130     // Don't transform one with multiple uses, this would require adding a new
4131     // load.
4132     if (!SDValue(Load, 0).hasOneUse())
4133       return false;
4134 
4135     if (LegalOperations &&
4136         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4137       return false;
4138 
4139     // For the transform to be legal, the load must produce only two values
4140     // (the value loaded and the chain).  Don't transform a pre-increment
4141     // load, for example, which produces an extra value.  Otherwise the
4142     // transformation is not equivalent, and the downstream logic to replace
4143     // uses gets things wrong.
4144     if (Load->getNumValues() > 2)
4145       return false;
4146 
4147     // If the load that we're shrinking is an extload and we're not just
4148     // discarding the extension we can't simply shrink the load. Bail.
4149     // TODO: It would be possible to merge the extensions in some cases.
4150     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4151         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4152       return false;
4153 
4154     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4155       return false;
4156   } else {
4157     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4158     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4159     // Can't write outside the original store
4160     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4161       return false;
4162 
4163     if (LegalOperations &&
4164         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4165       return false;
4166   }
4167   return true;
4168 }
4169 
4170 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4171                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
4172                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4173                                     ConstantSDNode *Mask,
4174                                     SDNode *&NodeToMask) {
4175   // Recursively search for the operands, looking for loads which can be
4176   // narrowed.
4177   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4178     SDValue Op = N->getOperand(i);
4179 
4180     if (Op.getValueType().isVector())
4181       return false;
4182 
4183     // Some constants may need fixing up later if they are too large.
4184     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4185       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4186           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4187         NodesWithConsts.insert(N);
4188       continue;
4189     }
4190 
4191     if (!Op.hasOneUse())
4192       return false;
4193 
4194     switch(Op.getOpcode()) {
4195     case ISD::LOAD: {
4196       auto *Load = cast<LoadSDNode>(Op);
4197       EVT ExtVT;
4198       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4199           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4200 
4201         // ZEXTLOAD is already small enough.
4202         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4203             ExtVT.bitsGE(Load->getMemoryVT()))
4204           continue;
4205 
4206         // Use LE to convert equal sized loads to zext.
4207         if (ExtVT.bitsLE(Load->getMemoryVT()))
4208           Loads.insert(Load);
4209 
4210         continue;
4211       }
4212       return false;
4213     }
4214     case ISD::ZERO_EXTEND:
4215     case ISD::AssertZext: {
4216       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4217       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4218       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4219         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4220         Op.getOperand(0).getValueType();
4221 
4222       // We can accept extending nodes if the mask is wider or an equal
4223       // width to the original type.
4224       if (ExtVT.bitsGE(VT))
4225         continue;
4226       break;
4227     }
4228     case ISD::OR:
4229     case ISD::XOR:
4230     case ISD::AND:
4231       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4232                              NodeToMask))
4233         return false;
4234       continue;
4235     }
4236 
4237     // Allow one node which will masked along with any loads found.
4238     if (NodeToMask)
4239       return false;
4240 
4241     // Also ensure that the node to be masked only produces one data result.
4242     NodeToMask = Op.getNode();
4243     if (NodeToMask->getNumValues() > 1) {
4244       bool HasValue = false;
4245       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4246         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4247         if (VT != MVT::Glue && VT != MVT::Other) {
4248           if (HasValue) {
4249             NodeToMask = nullptr;
4250             return false;
4251           }
4252           HasValue = true;
4253         }
4254       }
4255       assert(HasValue && "Node to be masked has no data result?");
4256     }
4257   }
4258   return true;
4259 }
4260 
4261 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4262   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4263   if (!Mask)
4264     return false;
4265 
4266   if (!Mask->getAPIntValue().isMask())
4267     return false;
4268 
4269   // No need to do anything if the and directly uses a load.
4270   if (isa<LoadSDNode>(N->getOperand(0)))
4271     return false;
4272 
4273   SmallPtrSet<LoadSDNode*, 8> Loads;
4274   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4275   SDNode *FixupNode = nullptr;
4276   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4277     if (Loads.size() == 0)
4278       return false;
4279 
4280     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4281     SDValue MaskOp = N->getOperand(1);
4282 
4283     // If it exists, fixup the single node we allow in the tree that needs
4284     // masking.
4285     if (FixupNode) {
4286       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4287       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4288                                 FixupNode->getValueType(0),
4289                                 SDValue(FixupNode, 0), MaskOp);
4290       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4291       if (And.getOpcode() == ISD ::AND)
4292         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4293     }
4294 
4295     // Narrow any constants that need it.
4296     for (auto *LogicN : NodesWithConsts) {
4297       SDValue Op0 = LogicN->getOperand(0);
4298       SDValue Op1 = LogicN->getOperand(1);
4299 
4300       if (isa<ConstantSDNode>(Op0))
4301           std::swap(Op0, Op1);
4302 
4303       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4304                                 Op1, MaskOp);
4305 
4306       DAG.UpdateNodeOperands(LogicN, Op0, And);
4307     }
4308 
4309     // Create narrow loads.
4310     for (auto *Load : Loads) {
4311       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4312       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4313                                 SDValue(Load, 0), MaskOp);
4314       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4315       if (And.getOpcode() == ISD ::AND)
4316         And = SDValue(
4317             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4318       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4319       assert(NewLoad &&
4320              "Shouldn't be masking the load if it can't be narrowed");
4321       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4322     }
4323     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4324     return true;
4325   }
4326   return false;
4327 }
4328 
4329 // Unfold
4330 //    x &  (-1 'logical shift' y)
4331 // To
4332 //    (x 'opposite logical shift' y) 'logical shift' y
4333 // if it is better for performance.
4334 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4335   assert(N->getOpcode() == ISD::AND);
4336 
4337   SDValue N0 = N->getOperand(0);
4338   SDValue N1 = N->getOperand(1);
4339 
4340   // Do we actually prefer shifts over mask?
4341   if (!TLI.preferShiftsToClearExtremeBits(N0))
4342     return SDValue();
4343 
4344   // Try to match  (-1 '[outer] logical shift' y)
4345   unsigned OuterShift;
4346   unsigned InnerShift; // The opposite direction to the OuterShift.
4347   SDValue Y;           // Shift amount.
4348   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4349     if (!M.hasOneUse())
4350       return false;
4351     OuterShift = M->getOpcode();
4352     if (OuterShift == ISD::SHL)
4353       InnerShift = ISD::SRL;
4354     else if (OuterShift == ISD::SRL)
4355       InnerShift = ISD::SHL;
4356     else
4357       return false;
4358     if (!isAllOnesConstant(M->getOperand(0)))
4359       return false;
4360     Y = M->getOperand(1);
4361     return true;
4362   };
4363 
4364   SDValue X;
4365   if (matchMask(N1))
4366     X = N0;
4367   else if (matchMask(N0))
4368     X = N1;
4369   else
4370     return SDValue();
4371 
4372   SDLoc DL(N);
4373   EVT VT = N->getValueType(0);
4374 
4375   //     tmp = x   'opposite logical shift' y
4376   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4377   //     ret = tmp 'logical shift' y
4378   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4379 
4380   return T1;
4381 }
4382 
4383 SDValue DAGCombiner::visitAND(SDNode *N) {
4384   SDValue N0 = N->getOperand(0);
4385   SDValue N1 = N->getOperand(1);
4386   EVT VT = N1.getValueType();
4387 
4388   // x & x --> x
4389   if (N0 == N1)
4390     return N0;
4391 
4392   // fold vector ops
4393   if (VT.isVector()) {
4394     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4395       return FoldedVOp;
4396 
4397     // fold (and x, 0) -> 0, vector edition
4398     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4399       // do not return N0, because undef node may exist in N0
4400       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4401                              SDLoc(N), N0.getValueType());
4402     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4403       // do not return N1, because undef node may exist in N1
4404       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4405                              SDLoc(N), N1.getValueType());
4406 
4407     // fold (and x, -1) -> x, vector edition
4408     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4409       return N1;
4410     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4411       return N0;
4412   }
4413 
4414   // fold (and c1, c2) -> c1&c2
4415   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4416   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4417   if (N0C && N1C && !N1C->isOpaque())
4418     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4419   // canonicalize constant to RHS
4420   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4421      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4422     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4423   // fold (and x, -1) -> x
4424   if (isAllOnesConstant(N1))
4425     return N0;
4426   // if (and x, c) is known to be zero, return 0
4427   unsigned BitWidth = VT.getScalarSizeInBits();
4428   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4429                                    APInt::getAllOnesValue(BitWidth)))
4430     return DAG.getConstant(0, SDLoc(N), VT);
4431 
4432   if (SDValue NewSel = foldBinOpIntoSelect(N))
4433     return NewSel;
4434 
4435   // reassociate and
4436   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4437     return RAND;
4438 
4439   // Try to convert a constant mask AND into a shuffle clear mask.
4440   if (VT.isVector())
4441     if (SDValue Shuffle = XformToShuffleWithZero(N))
4442       return Shuffle;
4443 
4444   // fold (and (or x, C), D) -> D if (C & D) == D
4445   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4446     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4447   };
4448   if (N0.getOpcode() == ISD::OR &&
4449       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4450     return N1;
4451   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4452   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4453     SDValue N0Op0 = N0.getOperand(0);
4454     APInt Mask = ~N1C->getAPIntValue();
4455     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4456     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4457       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4458                                  N0.getValueType(), N0Op0);
4459 
4460       // Replace uses of the AND with uses of the Zero extend node.
4461       CombineTo(N, Zext);
4462 
4463       // We actually want to replace all uses of the any_extend with the
4464       // zero_extend, to avoid duplicating things.  This will later cause this
4465       // AND to be folded.
4466       CombineTo(N0.getNode(), Zext);
4467       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4468     }
4469   }
4470   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4471   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4472   // already be zero by virtue of the width of the base type of the load.
4473   //
4474   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4475   // more cases.
4476   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4477        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4478        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4479        N0.getOperand(0).getResNo() == 0) ||
4480       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4481     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4482                                          N0 : N0.getOperand(0) );
4483 
4484     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4485     // This can be a pure constant or a vector splat, in which case we treat the
4486     // vector as a scalar and use the splat value.
4487     APInt Constant = APInt::getNullValue(1);
4488     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4489       Constant = C->getAPIntValue();
4490     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4491       APInt SplatValue, SplatUndef;
4492       unsigned SplatBitSize;
4493       bool HasAnyUndefs;
4494       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4495                                              SplatBitSize, HasAnyUndefs);
4496       if (IsSplat) {
4497         // Undef bits can contribute to a possible optimisation if set, so
4498         // set them.
4499         SplatValue |= SplatUndef;
4500 
4501         // The splat value may be something like "0x00FFFFFF", which means 0 for
4502         // the first vector value and FF for the rest, repeating. We need a mask
4503         // that will apply equally to all members of the vector, so AND all the
4504         // lanes of the constant together.
4505         EVT VT = Vector->getValueType(0);
4506         unsigned BitWidth = VT.getScalarSizeInBits();
4507 
4508         // If the splat value has been compressed to a bitlength lower
4509         // than the size of the vector lane, we need to re-expand it to
4510         // the lane size.
4511         if (BitWidth > SplatBitSize)
4512           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4513                SplatBitSize < BitWidth;
4514                SplatBitSize = SplatBitSize * 2)
4515             SplatValue |= SplatValue.shl(SplatBitSize);
4516 
4517         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4518         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4519         if (SplatBitSize % BitWidth == 0) {
4520           Constant = APInt::getAllOnesValue(BitWidth);
4521           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4522             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4523         }
4524       }
4525     }
4526 
4527     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4528     // actually legal and isn't going to get expanded, else this is a false
4529     // optimisation.
4530     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4531                                                     Load->getValueType(0),
4532                                                     Load->getMemoryVT());
4533 
4534     // Resize the constant to the same size as the original memory access before
4535     // extension. If it is still the AllOnesValue then this AND is completely
4536     // unneeded.
4537     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4538 
4539     bool B;
4540     switch (Load->getExtensionType()) {
4541     default: B = false; break;
4542     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4543     case ISD::ZEXTLOAD:
4544     case ISD::NON_EXTLOAD: B = true; break;
4545     }
4546 
4547     if (B && Constant.isAllOnesValue()) {
4548       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4549       // preserve semantics once we get rid of the AND.
4550       SDValue NewLoad(Load, 0);
4551 
4552       // Fold the AND away. NewLoad may get replaced immediately.
4553       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4554 
4555       if (Load->getExtensionType() == ISD::EXTLOAD) {
4556         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4557                               Load->getValueType(0), SDLoc(Load),
4558                               Load->getChain(), Load->getBasePtr(),
4559                               Load->getOffset(), Load->getMemoryVT(),
4560                               Load->getMemOperand());
4561         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4562         if (Load->getNumValues() == 3) {
4563           // PRE/POST_INC loads have 3 values.
4564           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4565                            NewLoad.getValue(2) };
4566           CombineTo(Load, To, 3, true);
4567         } else {
4568           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4569         }
4570       }
4571 
4572       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4573     }
4574   }
4575 
4576   // fold (and (load x), 255) -> (zextload x, i8)
4577   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4578   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4579   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4580                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4581                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4582     if (SDValue Res = ReduceLoadWidth(N)) {
4583       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4584         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4585 
4586       AddToWorklist(N);
4587       CombineTo(LN0, Res, Res.getValue(1));
4588       return SDValue(N, 0);
4589     }
4590   }
4591 
4592   if (Level >= AfterLegalizeTypes) {
4593     // Attempt to propagate the AND back up to the leaves which, if they're
4594     // loads, can be combined to narrow loads and the AND node can be removed.
4595     // Perform after legalization so that extend nodes will already be
4596     // combined into the loads.
4597     if (BackwardsPropagateMask(N, DAG)) {
4598       return SDValue(N, 0);
4599     }
4600   }
4601 
4602   if (SDValue Combined = visitANDLike(N0, N1, N))
4603     return Combined;
4604 
4605   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4606   if (N0.getOpcode() == N1.getOpcode())
4607     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4608       return Tmp;
4609 
4610   // Masking the negated extension of a boolean is just the zero-extended
4611   // boolean:
4612   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4613   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4614   //
4615   // Note: the SimplifyDemandedBits fold below can make an information-losing
4616   // transform, and then we have no way to find this better fold.
4617   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4618     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4619       SDValue SubRHS = N0.getOperand(1);
4620       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4621           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4622         return SubRHS;
4623       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4624           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4625         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4626     }
4627   }
4628 
4629   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4630   // fold (and (sra)) -> (and (srl)) when possible.
4631   if (SimplifyDemandedBits(SDValue(N, 0)))
4632     return SDValue(N, 0);
4633 
4634   // fold (zext_inreg (extload x)) -> (zextload x)
4635   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4636     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4637     EVT MemVT = LN0->getMemoryVT();
4638     // If we zero all the possible extended bits, then we can turn this into
4639     // a zextload if we are running before legalize or the operation is legal.
4640     unsigned BitWidth = N1.getScalarValueSizeInBits();
4641     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4642                            BitWidth - MemVT.getScalarSizeInBits())) &&
4643         ((!LegalOperations && !LN0->isVolatile()) ||
4644          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4645       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4646                                        LN0->getChain(), LN0->getBasePtr(),
4647                                        MemVT, LN0->getMemOperand());
4648       AddToWorklist(N);
4649       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4650       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4651     }
4652   }
4653   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4654   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4655       N0.hasOneUse()) {
4656     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4657     EVT MemVT = LN0->getMemoryVT();
4658     // If we zero all the possible extended bits, then we can turn this into
4659     // a zextload if we are running before legalize or the operation is legal.
4660     unsigned BitWidth = N1.getScalarValueSizeInBits();
4661     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4662                            BitWidth - MemVT.getScalarSizeInBits())) &&
4663         ((!LegalOperations && !LN0->isVolatile()) ||
4664          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4665       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4666                                        LN0->getChain(), LN0->getBasePtr(),
4667                                        MemVT, LN0->getMemOperand());
4668       AddToWorklist(N);
4669       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4670       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4671     }
4672   }
4673   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4674   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4675     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4676                                            N0.getOperand(1), false))
4677       return BSwap;
4678   }
4679 
4680   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4681     return Shifts;
4682 
4683   return SDValue();
4684 }
4685 
4686 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4687 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4688                                         bool DemandHighBits) {
4689   if (!LegalOperations)
4690     return SDValue();
4691 
4692   EVT VT = N->getValueType(0);
4693   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4694     return SDValue();
4695   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4696     return SDValue();
4697 
4698   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4699   bool LookPassAnd0 = false;
4700   bool LookPassAnd1 = false;
4701   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4702       std::swap(N0, N1);
4703   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4704       std::swap(N0, N1);
4705   if (N0.getOpcode() == ISD::AND) {
4706     if (!N0.getNode()->hasOneUse())
4707       return SDValue();
4708     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4709     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4710     // This is needed for X86.
4711     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4712                   N01C->getZExtValue() != 0xFFFF))
4713       return SDValue();
4714     N0 = N0.getOperand(0);
4715     LookPassAnd0 = true;
4716   }
4717 
4718   if (N1.getOpcode() == ISD::AND) {
4719     if (!N1.getNode()->hasOneUse())
4720       return SDValue();
4721     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4722     if (!N11C || N11C->getZExtValue() != 0xFF)
4723       return SDValue();
4724     N1 = N1.getOperand(0);
4725     LookPassAnd1 = true;
4726   }
4727 
4728   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4729     std::swap(N0, N1);
4730   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4731     return SDValue();
4732   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4733     return SDValue();
4734 
4735   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4736   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4737   if (!N01C || !N11C)
4738     return SDValue();
4739   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4740     return SDValue();
4741 
4742   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4743   SDValue N00 = N0->getOperand(0);
4744   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4745     if (!N00.getNode()->hasOneUse())
4746       return SDValue();
4747     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4748     if (!N001C || N001C->getZExtValue() != 0xFF)
4749       return SDValue();
4750     N00 = N00.getOperand(0);
4751     LookPassAnd0 = true;
4752   }
4753 
4754   SDValue N10 = N1->getOperand(0);
4755   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4756     if (!N10.getNode()->hasOneUse())
4757       return SDValue();
4758     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4759     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4760     // for X86.
4761     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4762                    N101C->getZExtValue() != 0xFFFF))
4763       return SDValue();
4764     N10 = N10.getOperand(0);
4765     LookPassAnd1 = true;
4766   }
4767 
4768   if (N00 != N10)
4769     return SDValue();
4770 
4771   // Make sure everything beyond the low halfword gets set to zero since the SRL
4772   // 16 will clear the top bits.
4773   unsigned OpSizeInBits = VT.getSizeInBits();
4774   if (DemandHighBits && OpSizeInBits > 16) {
4775     // If the left-shift isn't masked out then the only way this is a bswap is
4776     // if all bits beyond the low 8 are 0. In that case the entire pattern
4777     // reduces to a left shift anyway: leave it for other parts of the combiner.
4778     if (!LookPassAnd0)
4779       return SDValue();
4780 
4781     // However, if the right shift isn't masked out then it might be because
4782     // it's not needed. See if we can spot that too.
4783     if (!LookPassAnd1 &&
4784         !DAG.MaskedValueIsZero(
4785             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4786       return SDValue();
4787   }
4788 
4789   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4790   if (OpSizeInBits > 16) {
4791     SDLoc DL(N);
4792     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4793                       DAG.getConstant(OpSizeInBits - 16, DL,
4794                                       getShiftAmountTy(VT)));
4795   }
4796   return Res;
4797 }
4798 
4799 /// Return true if the specified node is an element that makes up a 32-bit
4800 /// packed halfword byteswap.
4801 /// ((x & 0x000000ff) << 8) |
4802 /// ((x & 0x0000ff00) >> 8) |
4803 /// ((x & 0x00ff0000) << 8) |
4804 /// ((x & 0xff000000) >> 8)
4805 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4806   if (!N.getNode()->hasOneUse())
4807     return false;
4808 
4809   unsigned Opc = N.getOpcode();
4810   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4811     return false;
4812 
4813   SDValue N0 = N.getOperand(0);
4814   unsigned Opc0 = N0.getOpcode();
4815   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4816     return false;
4817 
4818   ConstantSDNode *N1C = nullptr;
4819   // SHL or SRL: look upstream for AND mask operand
4820   if (Opc == ISD::AND)
4821     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4822   else if (Opc0 == ISD::AND)
4823     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4824   if (!N1C)
4825     return false;
4826 
4827   unsigned MaskByteOffset;
4828   switch (N1C->getZExtValue()) {
4829   default:
4830     return false;
4831   case 0xFF:       MaskByteOffset = 0; break;
4832   case 0xFF00:     MaskByteOffset = 1; break;
4833   case 0xFFFF:
4834     // In case demanded bits didn't clear the bits that will be shifted out.
4835     // This is needed for X86.
4836     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4837       MaskByteOffset = 1;
4838       break;
4839     }
4840     return false;
4841   case 0xFF0000:   MaskByteOffset = 2; break;
4842   case 0xFF000000: MaskByteOffset = 3; break;
4843   }
4844 
4845   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4846   if (Opc == ISD::AND) {
4847     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4848       // (x >> 8) & 0xff
4849       // (x >> 8) & 0xff0000
4850       if (Opc0 != ISD::SRL)
4851         return false;
4852       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4853       if (!C || C->getZExtValue() != 8)
4854         return false;
4855     } else {
4856       // (x << 8) & 0xff00
4857       // (x << 8) & 0xff000000
4858       if (Opc0 != ISD::SHL)
4859         return false;
4860       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4861       if (!C || C->getZExtValue() != 8)
4862         return false;
4863     }
4864   } else if (Opc == ISD::SHL) {
4865     // (x & 0xff) << 8
4866     // (x & 0xff0000) << 8
4867     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4868       return false;
4869     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4870     if (!C || C->getZExtValue() != 8)
4871       return false;
4872   } else { // Opc == ISD::SRL
4873     // (x & 0xff00) >> 8
4874     // (x & 0xff000000) >> 8
4875     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4876       return false;
4877     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4878     if (!C || C->getZExtValue() != 8)
4879       return false;
4880   }
4881 
4882   if (Parts[MaskByteOffset])
4883     return false;
4884 
4885   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4886   return true;
4887 }
4888 
4889 /// Match a 32-bit packed halfword bswap. That is
4890 /// ((x & 0x000000ff) << 8) |
4891 /// ((x & 0x0000ff00) >> 8) |
4892 /// ((x & 0x00ff0000) << 8) |
4893 /// ((x & 0xff000000) >> 8)
4894 /// => (rotl (bswap x), 16)
4895 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4896   if (!LegalOperations)
4897     return SDValue();
4898 
4899   EVT VT = N->getValueType(0);
4900   if (VT != MVT::i32)
4901     return SDValue();
4902   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4903     return SDValue();
4904 
4905   // Look for either
4906   // (or (or (and), (and)), (or (and), (and)))
4907   // (or (or (or (and), (and)), (and)), (and))
4908   if (N0.getOpcode() != ISD::OR)
4909     return SDValue();
4910   SDValue N00 = N0.getOperand(0);
4911   SDValue N01 = N0.getOperand(1);
4912   SDNode *Parts[4] = {};
4913 
4914   if (N1.getOpcode() == ISD::OR &&
4915       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4916     // (or (or (and), (and)), (or (and), (and)))
4917     if (!isBSwapHWordElement(N00, Parts))
4918       return SDValue();
4919 
4920     if (!isBSwapHWordElement(N01, Parts))
4921       return SDValue();
4922     SDValue N10 = N1.getOperand(0);
4923     if (!isBSwapHWordElement(N10, Parts))
4924       return SDValue();
4925     SDValue N11 = N1.getOperand(1);
4926     if (!isBSwapHWordElement(N11, Parts))
4927       return SDValue();
4928   } else {
4929     // (or (or (or (and), (and)), (and)), (and))
4930     if (!isBSwapHWordElement(N1, Parts))
4931       return SDValue();
4932     if (!isBSwapHWordElement(N01, Parts))
4933       return SDValue();
4934     if (N00.getOpcode() != ISD::OR)
4935       return SDValue();
4936     SDValue N000 = N00.getOperand(0);
4937     if (!isBSwapHWordElement(N000, Parts))
4938       return SDValue();
4939     SDValue N001 = N00.getOperand(1);
4940     if (!isBSwapHWordElement(N001, Parts))
4941       return SDValue();
4942   }
4943 
4944   // Make sure the parts are all coming from the same node.
4945   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4946     return SDValue();
4947 
4948   SDLoc DL(N);
4949   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4950                               SDValue(Parts[0], 0));
4951 
4952   // Result of the bswap should be rotated by 16. If it's not legal, then
4953   // do  (x << 16) | (x >> 16).
4954   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4955   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4956     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4957   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4958     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4959   return DAG.getNode(ISD::OR, DL, VT,
4960                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4961                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4962 }
4963 
4964 /// This contains all DAGCombine rules which reduce two values combined by
4965 /// an Or operation to a single value \see visitANDLike().
4966 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4967   EVT VT = N1.getValueType();
4968   SDLoc DL(N);
4969 
4970   // fold (or x, undef) -> -1
4971   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4972     return DAG.getAllOnesConstant(DL, VT);
4973 
4974   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4975     return V;
4976 
4977   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4978   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4979       // Don't increase # computations.
4980       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4981     // We can only do this xform if we know that bits from X that are set in C2
4982     // but not in C1 are already zero.  Likewise for Y.
4983     if (const ConstantSDNode *N0O1C =
4984         getAsNonOpaqueConstant(N0.getOperand(1))) {
4985       if (const ConstantSDNode *N1O1C =
4986           getAsNonOpaqueConstant(N1.getOperand(1))) {
4987         // We can only do this xform if we know that bits from X that are set in
4988         // C2 but not in C1 are already zero.  Likewise for Y.
4989         const APInt &LHSMask = N0O1C->getAPIntValue();
4990         const APInt &RHSMask = N1O1C->getAPIntValue();
4991 
4992         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4993             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4994           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4995                                   N0.getOperand(0), N1.getOperand(0));
4996           return DAG.getNode(ISD::AND, DL, VT, X,
4997                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4998         }
4999       }
5000     }
5001   }
5002 
5003   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5004   if (N0.getOpcode() == ISD::AND &&
5005       N1.getOpcode() == ISD::AND &&
5006       N0.getOperand(0) == N1.getOperand(0) &&
5007       // Don't increase # computations.
5008       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5009     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5010                             N0.getOperand(1), N1.getOperand(1));
5011     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5012   }
5013 
5014   return SDValue();
5015 }
5016 
5017 SDValue DAGCombiner::visitOR(SDNode *N) {
5018   SDValue N0 = N->getOperand(0);
5019   SDValue N1 = N->getOperand(1);
5020   EVT VT = N1.getValueType();
5021 
5022   // x | x --> x
5023   if (N0 == N1)
5024     return N0;
5025 
5026   // fold vector ops
5027   if (VT.isVector()) {
5028     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5029       return FoldedVOp;
5030 
5031     // fold (or x, 0) -> x, vector edition
5032     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5033       return N1;
5034     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5035       return N0;
5036 
5037     // fold (or x, -1) -> -1, vector edition
5038     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5039       // do not return N0, because undef node may exist in N0
5040       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5041     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5042       // do not return N1, because undef node may exist in N1
5043       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5044 
5045     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5046     // Do this only if the resulting shuffle is legal.
5047     if (isa<ShuffleVectorSDNode>(N0) &&
5048         isa<ShuffleVectorSDNode>(N1) &&
5049         // Avoid folding a node with illegal type.
5050         TLI.isTypeLegal(VT)) {
5051       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5052       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5053       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5054       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5055       // Ensure both shuffles have a zero input.
5056       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5057         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5058         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5059         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5060         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5061         bool CanFold = true;
5062         int NumElts = VT.getVectorNumElements();
5063         SmallVector<int, 4> Mask(NumElts);
5064 
5065         for (int i = 0; i != NumElts; ++i) {
5066           int M0 = SV0->getMaskElt(i);
5067           int M1 = SV1->getMaskElt(i);
5068 
5069           // Determine if either index is pointing to a zero vector.
5070           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5071           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5072 
5073           // If one element is zero and the otherside is undef, keep undef.
5074           // This also handles the case that both are undef.
5075           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5076             Mask[i] = -1;
5077             continue;
5078           }
5079 
5080           // Make sure only one of the elements is zero.
5081           if (M0Zero == M1Zero) {
5082             CanFold = false;
5083             break;
5084           }
5085 
5086           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5087 
5088           // We have a zero and non-zero element. If the non-zero came from
5089           // SV0 make the index a LHS index. If it came from SV1, make it
5090           // a RHS index. We need to mod by NumElts because we don't care
5091           // which operand it came from in the original shuffles.
5092           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5093         }
5094 
5095         if (CanFold) {
5096           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5097           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5098 
5099           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5100           if (!LegalMask) {
5101             std::swap(NewLHS, NewRHS);
5102             ShuffleVectorSDNode::commuteMask(Mask);
5103             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5104           }
5105 
5106           if (LegalMask)
5107             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5108         }
5109       }
5110     }
5111   }
5112 
5113   // fold (or c1, c2) -> c1|c2
5114   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5115   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5116   if (N0C && N1C && !N1C->isOpaque())
5117     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5118   // canonicalize constant to RHS
5119   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5120      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5121     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5122   // fold (or x, 0) -> x
5123   if (isNullConstant(N1))
5124     return N0;
5125   // fold (or x, -1) -> -1
5126   if (isAllOnesConstant(N1))
5127     return N1;
5128 
5129   if (SDValue NewSel = foldBinOpIntoSelect(N))
5130     return NewSel;
5131 
5132   // fold (or x, c) -> c iff (x & ~c) == 0
5133   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5134     return N1;
5135 
5136   if (SDValue Combined = visitORLike(N0, N1, N))
5137     return Combined;
5138 
5139   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5140   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5141     return BSwap;
5142   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5143     return BSwap;
5144 
5145   // reassociate or
5146   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5147     return ROR;
5148 
5149   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5150   // iff (c1 & c2) != 0.
5151   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5152     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
5153   };
5154   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5155       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
5156     if (SDValue COR = DAG.FoldConstantArithmetic(
5157             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5158       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5159       AddToWorklist(IOR.getNode());
5160       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5161     }
5162   }
5163 
5164   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5165   if (N0.getOpcode() == N1.getOpcode())
5166     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5167       return Tmp;
5168 
5169   // See if this is some rotate idiom.
5170   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5171     return SDValue(Rot, 0);
5172 
5173   if (SDValue Load = MatchLoadCombine(N))
5174     return Load;
5175 
5176   // Simplify the operands using demanded-bits information.
5177   if (SimplifyDemandedBits(SDValue(N, 0)))
5178     return SDValue(N, 0);
5179 
5180   return SDValue();
5181 }
5182 
5183 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5184   if (Op.getOpcode() == ISD::AND &&
5185       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5186     Mask = Op.getOperand(1);
5187     return Op.getOperand(0);
5188   }
5189   return Op;
5190 }
5191 
5192 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5193 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5194                             SDValue &Mask) {
5195   Op = stripConstantMask(DAG, Op, Mask);
5196   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5197     Shift = Op;
5198     return true;
5199   }
5200   return false;
5201 }
5202 
5203 /// Helper function for visitOR to extract the needed side of a rotate idiom
5204 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5205 /// InstCombine merged some outside op with one of the shifts from
5206 /// the rotate pattern.
5207 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5208 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5209 /// patterns:
5210 ///
5211 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5212 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5213 ///
5214 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5215 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5216 ///
5217 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5218 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5219 ///
5220 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5221 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5222 ///
5223 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5224 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5225                                      SDValue ExtractFrom, SDValue &Mask,
5226                                      const SDLoc &DL) {
5227   assert(OppShift && ExtractFrom && "Empty SDValue");
5228   assert(
5229       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5230       "Existing shift must be valid as a rotate half");
5231 
5232   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5233   // Preconditions:
5234   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5235   //
5236   // Find opcode of the needed shift to be extracted from (op0 v c0).
5237   unsigned Opcode = ISD::DELETED_NODE;
5238   bool IsMulOrDiv = false;
5239   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5240   // opcode or its arithmetic (mul or udiv) variant.
5241   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5242     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5243     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5244       return false;
5245     Opcode = NeededShift;
5246     return true;
5247   };
5248   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5249   // that the needed shift can be extracted from.
5250   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5251       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5252     return SDValue();
5253 
5254   // op0 must be the same opcode on both sides, have the same LHS argument,
5255   // and produce the same value type.
5256   SDValue OppShiftLHS = OppShift.getOperand(0);
5257   EVT ShiftedVT = OppShiftLHS.getValueType();
5258   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5259       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5260       ShiftedVT != ExtractFrom.getValueType())
5261     return SDValue();
5262 
5263   // Amount of the existing shift.
5264   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5265   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5266   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5267   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5268   ConstantSDNode *ExtractFromCst =
5269       isConstOrConstSplat(ExtractFrom.getOperand(1));
5270   // TODO: We should be able to handle non-uniform constant vectors for these values
5271   // Check that we have constant values.
5272   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5273       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5274       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5275     return SDValue();
5276 
5277   // Compute the shift amount we need to extract to complete the rotate.
5278   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5279   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5280     return SDValue();
5281   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5282   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5283   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5284   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5285   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5286 
5287   // Now try extract the needed shift from the ExtractFrom op and see if the
5288   // result matches up with the existing shift's LHS op.
5289   if (IsMulOrDiv) {
5290     // Op to extract from is a mul or udiv by a constant.
5291     // Check:
5292     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5293     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5294     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5295                                                  NeededShiftAmt.getZExtValue());
5296     APInt ResultAmt;
5297     APInt Rem;
5298     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5299     if (Rem != 0 || ResultAmt != OppLHSAmt)
5300       return SDValue();
5301   } else {
5302     // Op to extract from is a shift by a constant.
5303     // Check:
5304     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5305     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5306                                           ExtractFromAmt.getBitWidth()))
5307       return SDValue();
5308   }
5309 
5310   // Return the expanded shift op that should allow a rotate to be formed.
5311   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5312   EVT ResVT = ExtractFrom.getValueType();
5313   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5314   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5315 }
5316 
5317 // Return true if we can prove that, whenever Neg and Pos are both in the
5318 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5319 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5320 //
5321 //     (or (shift1 X, Neg), (shift2 X, Pos))
5322 //
5323 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5324 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5325 // to consider shift amounts with defined behavior.
5326 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5327                            SelectionDAG &DAG) {
5328   // If EltSize is a power of 2 then:
5329   //
5330   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5331   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5332   //
5333   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5334   // for the stronger condition:
5335   //
5336   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5337   //
5338   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5339   // we can just replace Neg with Neg' for the rest of the function.
5340   //
5341   // In other cases we check for the even stronger condition:
5342   //
5343   //     Neg == EltSize - Pos                                    [B]
5344   //
5345   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5346   // behavior if Pos == 0 (and consequently Neg == EltSize).
5347   //
5348   // We could actually use [A] whenever EltSize is a power of 2, but the
5349   // only extra cases that it would match are those uninteresting ones
5350   // where Neg and Pos are never in range at the same time.  E.g. for
5351   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5352   // as well as (sub 32, Pos), but:
5353   //
5354   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5355   //
5356   // always invokes undefined behavior for 32-bit X.
5357   //
5358   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5359   unsigned MaskLoBits = 0;
5360   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5361     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5362       KnownBits Known;
5363       DAG.computeKnownBits(Neg.getOperand(0), Known);
5364       unsigned Bits = Log2_64(EltSize);
5365       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5366           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5367         Neg = Neg.getOperand(0);
5368         MaskLoBits = Bits;
5369       }
5370     }
5371   }
5372 
5373   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5374   if (Neg.getOpcode() != ISD::SUB)
5375     return false;
5376   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5377   if (!NegC)
5378     return false;
5379   SDValue NegOp1 = Neg.getOperand(1);
5380 
5381   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5382   // Pos'.  The truncation is redundant for the purpose of the equality.
5383   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5384     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5385       KnownBits Known;
5386       DAG.computeKnownBits(Pos.getOperand(0), Known);
5387       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5388           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5389            MaskLoBits))
5390         Pos = Pos.getOperand(0);
5391     }
5392   }
5393 
5394   // The condition we need is now:
5395   //
5396   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5397   //
5398   // If NegOp1 == Pos then we need:
5399   //
5400   //              EltSize & Mask == NegC & Mask
5401   //
5402   // (because "x & Mask" is a truncation and distributes through subtraction).
5403   APInt Width;
5404   if (Pos == NegOp1)
5405     Width = NegC->getAPIntValue();
5406 
5407   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5408   // Then the condition we want to prove becomes:
5409   //
5410   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5411   //
5412   // which, again because "x & Mask" is a truncation, becomes:
5413   //
5414   //                NegC & Mask == (EltSize - PosC) & Mask
5415   //             EltSize & Mask == (NegC + PosC) & Mask
5416   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5417     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5418       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5419     else
5420       return false;
5421   } else
5422     return false;
5423 
5424   // Now we just need to check that EltSize & Mask == Width & Mask.
5425   if (MaskLoBits)
5426     // EltSize & Mask is 0 since Mask is EltSize - 1.
5427     return Width.getLoBits(MaskLoBits) == 0;
5428   return Width == EltSize;
5429 }
5430 
5431 // A subroutine of MatchRotate used once we have found an OR of two opposite
5432 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5433 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5434 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5435 // Neg with outer conversions stripped away.
5436 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5437                                        SDValue Neg, SDValue InnerPos,
5438                                        SDValue InnerNeg, unsigned PosOpcode,
5439                                        unsigned NegOpcode, const SDLoc &DL) {
5440   // fold (or (shl x, (*ext y)),
5441   //          (srl x, (*ext (sub 32, y)))) ->
5442   //   (rotl x, y) or (rotr x, (sub 32, y))
5443   //
5444   // fold (or (shl x, (*ext (sub 32, y))),
5445   //          (srl x, (*ext y))) ->
5446   //   (rotr x, y) or (rotl x, (sub 32, y))
5447   EVT VT = Shifted.getValueType();
5448   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5449     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5450     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5451                        HasPos ? Pos : Neg).getNode();
5452   }
5453 
5454   return nullptr;
5455 }
5456 
5457 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5458 // idioms for rotate, and if the target supports rotation instructions, generate
5459 // a rot[lr].
5460 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5461   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5462   EVT VT = LHS.getValueType();
5463   if (!TLI.isTypeLegal(VT)) return nullptr;
5464 
5465   // The target must have at least one rotate flavor.
5466   bool HasROTL = hasOperation(ISD::ROTL, VT);
5467   bool HasROTR = hasOperation(ISD::ROTR, VT);
5468   if (!HasROTL && !HasROTR) return nullptr;
5469 
5470   // Check for truncated rotate.
5471   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5472       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5473     assert(LHS.getValueType() == RHS.getValueType());
5474     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5475       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5476                          SDValue(Rot, 0)).getNode();
5477     }
5478   }
5479 
5480   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5481   SDValue LHSShift;   // The shift.
5482   SDValue LHSMask;    // AND value if any.
5483   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5484 
5485   SDValue RHSShift;   // The shift.
5486   SDValue RHSMask;    // AND value if any.
5487   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5488 
5489   // If neither side matched a rotate half, bail
5490   if (!LHSShift && !RHSShift)
5491     return nullptr;
5492 
5493   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5494   // side of the rotate, so try to handle that here. In all cases we need to
5495   // pass the matched shift from the opposite side to compute the opcode and
5496   // needed shift amount to extract.  We still want to do this if both sides
5497   // matched a rotate half because one half may be a potential overshift that
5498   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5499   // single one).
5500 
5501   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5502   if (LHSShift)
5503     if (SDValue NewRHSShift =
5504             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5505       RHSShift = NewRHSShift;
5506   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5507   if (RHSShift)
5508     if (SDValue NewLHSShift =
5509             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5510       LHSShift = NewLHSShift;
5511 
5512   // If a side is still missing, nothing else we can do.
5513   if (!RHSShift || !LHSShift)
5514     return nullptr;
5515 
5516   // At this point we've matched or extracted a shift op on each side.
5517 
5518   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5519     return nullptr;   // Not shifting the same value.
5520 
5521   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5522     return nullptr;   // Shifts must disagree.
5523 
5524   // Canonicalize shl to left side in a shl/srl pair.
5525   if (RHSShift.getOpcode() == ISD::SHL) {
5526     std::swap(LHS, RHS);
5527     std::swap(LHSShift, RHSShift);
5528     std::swap(LHSMask, RHSMask);
5529   }
5530 
5531   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5532   SDValue LHSShiftArg = LHSShift.getOperand(0);
5533   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5534   SDValue RHSShiftArg = RHSShift.getOperand(0);
5535   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5536 
5537   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5538   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5539   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5540                                         ConstantSDNode *RHS) {
5541     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5542   };
5543   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5544     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5545                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5546 
5547     // If there is an AND of either shifted operand, apply it to the result.
5548     if (LHSMask.getNode() || RHSMask.getNode()) {
5549       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5550       SDValue Mask = AllOnes;
5551 
5552       if (LHSMask.getNode()) {
5553         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5554         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5555                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5556       }
5557       if (RHSMask.getNode()) {
5558         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5559         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5560                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5561       }
5562 
5563       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5564     }
5565 
5566     return Rot.getNode();
5567   }
5568 
5569   // If there is a mask here, and we have a variable shift, we can't be sure
5570   // that we're masking out the right stuff.
5571   if (LHSMask.getNode() || RHSMask.getNode())
5572     return nullptr;
5573 
5574   // If the shift amount is sign/zext/any-extended just peel it off.
5575   SDValue LExtOp0 = LHSShiftAmt;
5576   SDValue RExtOp0 = RHSShiftAmt;
5577   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5578        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5579        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5580        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5581       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5582        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5583        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5584        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5585     LExtOp0 = LHSShiftAmt.getOperand(0);
5586     RExtOp0 = RHSShiftAmt.getOperand(0);
5587   }
5588 
5589   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5590                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5591   if (TryL)
5592     return TryL;
5593 
5594   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5595                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5596   if (TryR)
5597     return TryR;
5598 
5599   return nullptr;
5600 }
5601 
5602 namespace {
5603 
5604 /// Represents known origin of an individual byte in load combine pattern. The
5605 /// value of the byte is either constant zero or comes from memory.
5606 struct ByteProvider {
5607   // For constant zero providers Load is set to nullptr. For memory providers
5608   // Load represents the node which loads the byte from memory.
5609   // ByteOffset is the offset of the byte in the value produced by the load.
5610   LoadSDNode *Load = nullptr;
5611   unsigned ByteOffset = 0;
5612 
5613   ByteProvider() = default;
5614 
5615   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5616     return ByteProvider(Load, ByteOffset);
5617   }
5618 
5619   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5620 
5621   bool isConstantZero() const { return !Load; }
5622   bool isMemory() const { return Load; }
5623 
5624   bool operator==(const ByteProvider &Other) const {
5625     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5626   }
5627 
5628 private:
5629   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5630       : Load(Load), ByteOffset(ByteOffset) {}
5631 };
5632 
5633 } // end anonymous namespace
5634 
5635 /// Recursively traverses the expression calculating the origin of the requested
5636 /// byte of the given value. Returns None if the provider can't be calculated.
5637 ///
5638 /// For all the values except the root of the expression verifies that the value
5639 /// has exactly one use and if it's not true return None. This way if the origin
5640 /// of the byte is returned it's guaranteed that the values which contribute to
5641 /// the byte are not used outside of this expression.
5642 ///
5643 /// Because the parts of the expression are not allowed to have more than one
5644 /// use this function iterates over trees, not DAGs. So it never visits the same
5645 /// node more than once.
5646 static const Optional<ByteProvider>
5647 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5648                       bool Root = false) {
5649   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5650   if (Depth == 10)
5651     return None;
5652 
5653   if (!Root && !Op.hasOneUse())
5654     return None;
5655 
5656   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5657   unsigned BitWidth = Op.getValueSizeInBits();
5658   if (BitWidth % 8 != 0)
5659     return None;
5660   unsigned ByteWidth = BitWidth / 8;
5661   assert(Index < ByteWidth && "invalid index requested");
5662   (void) ByteWidth;
5663 
5664   switch (Op.getOpcode()) {
5665   case ISD::OR: {
5666     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5667     if (!LHS)
5668       return None;
5669     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5670     if (!RHS)
5671       return None;
5672 
5673     if (LHS->isConstantZero())
5674       return RHS;
5675     if (RHS->isConstantZero())
5676       return LHS;
5677     return None;
5678   }
5679   case ISD::SHL: {
5680     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5681     if (!ShiftOp)
5682       return None;
5683 
5684     uint64_t BitShift = ShiftOp->getZExtValue();
5685     if (BitShift % 8 != 0)
5686       return None;
5687     uint64_t ByteShift = BitShift / 8;
5688 
5689     return Index < ByteShift
5690                ? ByteProvider::getConstantZero()
5691                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5692                                        Depth + 1);
5693   }
5694   case ISD::ANY_EXTEND:
5695   case ISD::SIGN_EXTEND:
5696   case ISD::ZERO_EXTEND: {
5697     SDValue NarrowOp = Op->getOperand(0);
5698     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5699     if (NarrowBitWidth % 8 != 0)
5700       return None;
5701     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5702 
5703     if (Index >= NarrowByteWidth)
5704       return Op.getOpcode() == ISD::ZERO_EXTEND
5705                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5706                  : None;
5707     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5708   }
5709   case ISD::BSWAP:
5710     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5711                                  Depth + 1);
5712   case ISD::LOAD: {
5713     auto L = cast<LoadSDNode>(Op.getNode());
5714     if (L->isVolatile() || L->isIndexed())
5715       return None;
5716 
5717     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5718     if (NarrowBitWidth % 8 != 0)
5719       return None;
5720     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5721 
5722     if (Index >= NarrowByteWidth)
5723       return L->getExtensionType() == ISD::ZEXTLOAD
5724                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5725                  : None;
5726     return ByteProvider::getMemory(L, Index);
5727   }
5728   }
5729 
5730   return None;
5731 }
5732 
5733 /// Match a pattern where a wide type scalar value is loaded by several narrow
5734 /// loads and combined by shifts and ors. Fold it into a single load or a load
5735 /// and a BSWAP if the targets supports it.
5736 ///
5737 /// Assuming little endian target:
5738 ///  i8 *a = ...
5739 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5740 /// =>
5741 ///  i32 val = *((i32)a)
5742 ///
5743 ///  i8 *a = ...
5744 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5745 /// =>
5746 ///  i32 val = BSWAP(*((i32)a))
5747 ///
5748 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5749 /// interact well with the worklist mechanism. When a part of the pattern is
5750 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5751 /// but the root node of the pattern which triggers the load combine is not
5752 /// necessarily a direct user of the changed node. For example, once the address
5753 /// of t28 load is reassociated load combine won't be triggered:
5754 ///             t25: i32 = add t4, Constant:i32<2>
5755 ///           t26: i64 = sign_extend t25
5756 ///        t27: i64 = add t2, t26
5757 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5758 ///     t29: i32 = zero_extend t28
5759 ///   t32: i32 = shl t29, Constant:i8<8>
5760 /// t33: i32 = or t23, t32
5761 /// As a possible fix visitLoad can check if the load can be a part of a load
5762 /// combine pattern and add corresponding OR roots to the worklist.
5763 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5764   assert(N->getOpcode() == ISD::OR &&
5765          "Can only match load combining against OR nodes");
5766 
5767   // Handles simple types only
5768   EVT VT = N->getValueType(0);
5769   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5770     return SDValue();
5771   unsigned ByteWidth = VT.getSizeInBits() / 8;
5772 
5773   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5774   // Before legalize we can introduce too wide illegal loads which will be later
5775   // split into legal sized loads. This enables us to combine i64 load by i8
5776   // patterns to a couple of i32 loads on 32 bit targets.
5777   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5778     return SDValue();
5779 
5780   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5781     unsigned BW, unsigned i) { return i; };
5782   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5783     unsigned BW, unsigned i) { return BW - i - 1; };
5784 
5785   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5786   auto MemoryByteOffset = [&] (ByteProvider P) {
5787     assert(P.isMemory() && "Must be a memory byte provider");
5788     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5789     assert(LoadBitWidth % 8 == 0 &&
5790            "can only analyze providers for individual bytes not bit");
5791     unsigned LoadByteWidth = LoadBitWidth / 8;
5792     return IsBigEndianTarget
5793             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5794             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5795   };
5796 
5797   Optional<BaseIndexOffset> Base;
5798   SDValue Chain;
5799 
5800   SmallPtrSet<LoadSDNode *, 8> Loads;
5801   Optional<ByteProvider> FirstByteProvider;
5802   int64_t FirstOffset = INT64_MAX;
5803 
5804   // Check if all the bytes of the OR we are looking at are loaded from the same
5805   // base address. Collect bytes offsets from Base address in ByteOffsets.
5806   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5807   for (unsigned i = 0; i < ByteWidth; i++) {
5808     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5809     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5810       return SDValue();
5811 
5812     LoadSDNode *L = P->Load;
5813     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5814            "Must be enforced by calculateByteProvider");
5815     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5816 
5817     // All loads must share the same chain
5818     SDValue LChain = L->getChain();
5819     if (!Chain)
5820       Chain = LChain;
5821     else if (Chain != LChain)
5822       return SDValue();
5823 
5824     // Loads must share the same base address
5825     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5826     int64_t ByteOffsetFromBase = 0;
5827     if (!Base)
5828       Base = Ptr;
5829     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5830       return SDValue();
5831 
5832     // Calculate the offset of the current byte from the base address
5833     ByteOffsetFromBase += MemoryByteOffset(*P);
5834     ByteOffsets[i] = ByteOffsetFromBase;
5835 
5836     // Remember the first byte load
5837     if (ByteOffsetFromBase < FirstOffset) {
5838       FirstByteProvider = P;
5839       FirstOffset = ByteOffsetFromBase;
5840     }
5841 
5842     Loads.insert(L);
5843   }
5844   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5845          "memory, so there must be at least one load which produces the value");
5846   assert(Base && "Base address of the accessed memory location must be set");
5847   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5848 
5849   // Check if the bytes of the OR we are looking at match with either big or
5850   // little endian value load
5851   bool BigEndian = true, LittleEndian = true;
5852   for (unsigned i = 0; i < ByteWidth; i++) {
5853     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5854     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5855     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5856     if (!BigEndian && !LittleEndian)
5857       return SDValue();
5858   }
5859   assert((BigEndian != LittleEndian) && "should be either or");
5860   assert(FirstByteProvider && "must be set");
5861 
5862   // Ensure that the first byte is loaded from zero offset of the first load.
5863   // So the combined value can be loaded from the first load address.
5864   if (MemoryByteOffset(*FirstByteProvider) != 0)
5865     return SDValue();
5866   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5867 
5868   // The node we are looking at matches with the pattern, check if we can
5869   // replace it with a single load and bswap if needed.
5870 
5871   // If the load needs byte swap check if the target supports it
5872   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5873 
5874   // Before legalize we can introduce illegal bswaps which will be later
5875   // converted to an explicit bswap sequence. This way we end up with a single
5876   // load and byte shuffling instead of several loads and byte shuffling.
5877   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5878     return SDValue();
5879 
5880   // Check that a load of the wide type is both allowed and fast on the target
5881   bool Fast = false;
5882   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5883                                         VT, FirstLoad->getAddressSpace(),
5884                                         FirstLoad->getAlignment(), &Fast);
5885   if (!Allowed || !Fast)
5886     return SDValue();
5887 
5888   SDValue NewLoad =
5889       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5890                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5891 
5892   // Transfer chain users from old loads to the new load.
5893   for (LoadSDNode *L : Loads)
5894     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5895 
5896   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5897 }
5898 
5899 // If the target has andn, bsl, or a similar bit-select instruction,
5900 // we want to unfold masked merge, with canonical pattern of:
5901 //   |        A  |  |B|
5902 //   ((x ^ y) & m) ^ y
5903 //    |  D  |
5904 // Into:
5905 //   (x & m) | (y & ~m)
5906 // If y is a constant, and the 'andn' does not work with immediates,
5907 // we unfold into a different pattern:
5908 //   ~(~x & m) & (m | y)
5909 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5910 //       the very least that breaks andnpd / andnps patterns, and because those
5911 //       patterns are simplified in IR and shouldn't be created in the DAG
5912 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5913   assert(N->getOpcode() == ISD::XOR);
5914 
5915   // Don't touch 'not' (i.e. where y = -1).
5916   if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
5917     return SDValue();
5918 
5919   EVT VT = N->getValueType(0);
5920 
5921   // There are 3 commutable operators in the pattern,
5922   // so we have to deal with 8 possible variants of the basic pattern.
5923   SDValue X, Y, M;
5924   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5925     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5926       return false;
5927     SDValue Xor = And.getOperand(XorIdx);
5928     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5929       return false;
5930     SDValue Xor0 = Xor.getOperand(0);
5931     SDValue Xor1 = Xor.getOperand(1);
5932     // Don't touch 'not' (i.e. where y = -1).
5933     if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
5934       return false;
5935     if (Other == Xor0)
5936       std::swap(Xor0, Xor1);
5937     if (Other != Xor1)
5938       return false;
5939     X = Xor0;
5940     Y = Xor1;
5941     M = And.getOperand(XorIdx ? 0 : 1);
5942     return true;
5943   };
5944 
5945   SDValue N0 = N->getOperand(0);
5946   SDValue N1 = N->getOperand(1);
5947   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5948       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5949     return SDValue();
5950 
5951   // Don't do anything if the mask is constant. This should not be reachable.
5952   // InstCombine should have already unfolded this pattern, and DAGCombiner
5953   // probably shouldn't produce it, too.
5954   if (isa<ConstantSDNode>(M.getNode()))
5955     return SDValue();
5956 
5957   // We can transform if the target has AndNot
5958   if (!TLI.hasAndNot(M))
5959     return SDValue();
5960 
5961   SDLoc DL(N);
5962 
5963   // If Y is a constant, check that 'andn' works with immediates.
5964   if (!TLI.hasAndNot(Y)) {
5965     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
5966     // If not, we need to do a bit more work to make sure andn is still used.
5967     SDValue NotX = DAG.getNOT(DL, X, VT);
5968     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
5969     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
5970     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
5971     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
5972   }
5973 
5974   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
5975   SDValue NotM = DAG.getNOT(DL, M, VT);
5976   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
5977 
5978   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
5979 }
5980 
5981 SDValue DAGCombiner::visitXOR(SDNode *N) {
5982   SDValue N0 = N->getOperand(0);
5983   SDValue N1 = N->getOperand(1);
5984   EVT VT = N0.getValueType();
5985 
5986   // fold vector ops
5987   if (VT.isVector()) {
5988     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5989       return FoldedVOp;
5990 
5991     // fold (xor x, 0) -> x, vector edition
5992     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5993       return N1;
5994     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5995       return N0;
5996   }
5997 
5998   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5999   if (N0.isUndef() && N1.isUndef())
6000     return DAG.getConstant(0, SDLoc(N), VT);
6001   // fold (xor x, undef) -> undef
6002   if (N0.isUndef())
6003     return N0;
6004   if (N1.isUndef())
6005     return N1;
6006   // fold (xor c1, c2) -> c1^c2
6007   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6008   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6009   if (N0C && N1C)
6010     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
6011   // canonicalize constant to RHS
6012   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6013      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6014     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
6015   // fold (xor x, 0) -> x
6016   if (isNullConstant(N1))
6017     return N0;
6018 
6019   if (SDValue NewSel = foldBinOpIntoSelect(N))
6020     return NewSel;
6021 
6022   // reassociate xor
6023   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1, N->getFlags()))
6024     return RXOR;
6025 
6026   // fold !(x cc y) -> (x !cc y)
6027   SDValue LHS, RHS, CC;
6028   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6029     bool isInt = LHS.getValueType().isInteger();
6030     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6031                                                isInt);
6032 
6033     if (!LegalOperations ||
6034         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6035       switch (N0.getOpcode()) {
6036       default:
6037         llvm_unreachable("Unhandled SetCC Equivalent!");
6038       case ISD::SETCC:
6039         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6040       case ISD::SELECT_CC:
6041         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6042                                N0.getOperand(3), NotCC);
6043       }
6044     }
6045   }
6046 
6047   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6048   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
6049       N0.getNode()->hasOneUse() &&
6050       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6051     SDValue V = N0.getOperand(0);
6052     SDLoc DL(N0);
6053     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
6054                     DAG.getConstant(1, DL, V.getValueType()));
6055     AddToWorklist(V.getNode());
6056     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
6057   }
6058 
6059   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6060   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6061       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6062     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6063     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6064       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6065       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6066       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6067       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6068       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6069     }
6070   }
6071   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6072   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6073       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6074     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6075     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6076       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6077       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6078       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6079       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6080       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6081     }
6082   }
6083   // fold (xor (and x, y), y) -> (and (not x), y)
6084   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6085       N0->getOperand(1) == N1) {
6086     SDValue X = N0->getOperand(0);
6087     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6088     AddToWorklist(NotX.getNode());
6089     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
6090   }
6091 
6092   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6093   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6094     SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
6095     SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
6096     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6097       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6098       SDValue S0 = S.getOperand(0);
6099       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6100         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6101         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6102           if (C->getAPIntValue() == (OpSizeInBits - 1))
6103             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
6104       }
6105     }
6106   }
6107 
6108   // fold (xor x, x) -> 0
6109   if (N0 == N1)
6110     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
6111 
6112   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6113   // Here is a concrete example of this equivalence:
6114   // i16   x ==  14
6115   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6116   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6117   //
6118   // =>
6119   //
6120   // i16     ~1      == 0b1111111111111110
6121   // i16 rol(~1, 14) == 0b1011111111111111
6122   //
6123   // Some additional tips to help conceptualize this transform:
6124   // - Try to see the operation as placing a single zero in a value of all ones.
6125   // - There exists no value for x which would allow the result to contain zero.
6126   // - Values of x larger than the bitwidth are undefined and do not require a
6127   //   consistent result.
6128   // - Pushing the zero left requires shifting one bits in from the right.
6129   // A rotate left of ~1 is a nice way of achieving the desired result.
6130   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
6131       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6132     SDLoc DL(N);
6133     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6134                        N0.getOperand(1));
6135   }
6136 
6137   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6138   if (N0.getOpcode() == N1.getOpcode())
6139     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
6140       return Tmp;
6141 
6142   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6143   if (SDValue MM = unfoldMaskedMerge(N))
6144     return MM;
6145 
6146   // Simplify the expression using non-local knowledge.
6147   if (SimplifyDemandedBits(SDValue(N, 0)))
6148     return SDValue(N, 0);
6149 
6150   return SDValue();
6151 }
6152 
6153 /// Handle transforms common to the three shifts, when the shift amount is a
6154 /// constant.
6155 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6156   SDNode *LHS = N->getOperand(0).getNode();
6157   if (!LHS->hasOneUse()) return SDValue();
6158 
6159   // We want to pull some binops through shifts, so that we have (and (shift))
6160   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6161   // thing happens with address calculations, so it's important to canonicalize
6162   // it.
6163   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6164 
6165   switch (LHS->getOpcode()) {
6166   default: return SDValue();
6167   case ISD::OR:
6168   case ISD::XOR:
6169     HighBitSet = false; // We can only transform sra if the high bit is clear.
6170     break;
6171   case ISD::AND:
6172     HighBitSet = true;  // We can only transform sra if the high bit is set.
6173     break;
6174   case ISD::ADD:
6175     if (N->getOpcode() != ISD::SHL)
6176       return SDValue(); // only shl(add) not sr[al](add).
6177     HighBitSet = false; // We can only transform sra if the high bit is clear.
6178     break;
6179   }
6180 
6181   // We require the RHS of the binop to be a constant and not opaque as well.
6182   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6183   if (!BinOpCst) return SDValue();
6184 
6185   // FIXME: disable this unless the input to the binop is a shift by a constant
6186   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6187   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6188   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6189                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6190                  BinOpLHSVal->getOpcode() == ISD::SRL;
6191   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6192                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6193 
6194   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6195       !isCopyOrSelect)
6196     return SDValue();
6197 
6198   if (isCopyOrSelect && N->hasOneUse())
6199     return SDValue();
6200 
6201   EVT VT = N->getValueType(0);
6202 
6203   // If this is a signed shift right, and the high bit is modified by the
6204   // logical operation, do not perform the transformation. The highBitSet
6205   // boolean indicates the value of the high bit of the constant which would
6206   // cause it to be modified for this operation.
6207   if (N->getOpcode() == ISD::SRA) {
6208     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6209     if (BinOpRHSSignSet != HighBitSet)
6210       return SDValue();
6211   }
6212 
6213   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6214     return SDValue();
6215 
6216   // Fold the constants, shifting the binop RHS by the shift amount.
6217   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6218                                N->getValueType(0),
6219                                LHS->getOperand(1), N->getOperand(1));
6220   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6221 
6222   // Create the new shift.
6223   SDValue NewShift = DAG.getNode(N->getOpcode(),
6224                                  SDLoc(LHS->getOperand(0)),
6225                                  VT, LHS->getOperand(0), N->getOperand(1));
6226 
6227   // Create the new binop.
6228   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6229 }
6230 
6231 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6232   assert(N->getOpcode() == ISD::TRUNCATE);
6233   assert(N->getOperand(0).getOpcode() == ISD::AND);
6234 
6235   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6236   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6237     SDValue N01 = N->getOperand(0).getOperand(1);
6238     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6239       SDLoc DL(N);
6240       EVT TruncVT = N->getValueType(0);
6241       SDValue N00 = N->getOperand(0).getOperand(0);
6242       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6243       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6244       AddToWorklist(Trunc00.getNode());
6245       AddToWorklist(Trunc01.getNode());
6246       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6247     }
6248   }
6249 
6250   return SDValue();
6251 }
6252 
6253 SDValue DAGCombiner::visitRotate(SDNode *N) {
6254   SDLoc dl(N);
6255   SDValue N0 = N->getOperand(0);
6256   SDValue N1 = N->getOperand(1);
6257   EVT VT = N->getValueType(0);
6258   unsigned Bitsize = VT.getScalarSizeInBits();
6259 
6260   // fold (rot x, 0) -> x
6261   if (isNullConstantOrNullSplatConstant(N1))
6262     return N0;
6263 
6264   // fold (rot x, c) -> (rot x, c % BitSize)
6265   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6266     if (Cst->getAPIntValue().uge(Bitsize)) {
6267       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6268       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6269                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6270     }
6271   }
6272 
6273   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6274   if (N1.getOpcode() == ISD::TRUNCATE &&
6275       N1.getOperand(0).getOpcode() == ISD::AND) {
6276     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6277       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6278   }
6279 
6280   unsigned NextOp = N0.getOpcode();
6281   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6282   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6283     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6284     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6285     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6286       EVT ShiftVT = C1->getValueType(0);
6287       bool SameSide = (N->getOpcode() == NextOp);
6288       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6289       if (SDValue CombinedShift =
6290               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6291         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6292         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6293             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6294             BitsizeC.getNode());
6295         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6296                            CombinedShiftNorm);
6297       }
6298     }
6299   }
6300   return SDValue();
6301 }
6302 
6303 SDValue DAGCombiner::visitSHL(SDNode *N) {
6304   SDValue N0 = N->getOperand(0);
6305   SDValue N1 = N->getOperand(1);
6306   EVT VT = N0.getValueType();
6307   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6308 
6309   // fold vector ops
6310   if (VT.isVector()) {
6311     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6312       return FoldedVOp;
6313 
6314     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6315     // If setcc produces all-one true value then:
6316     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6317     if (N1CV && N1CV->isConstant()) {
6318       if (N0.getOpcode() == ISD::AND) {
6319         SDValue N00 = N0->getOperand(0);
6320         SDValue N01 = N0->getOperand(1);
6321         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6322 
6323         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6324             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6325                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6326           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6327                                                      N01CV, N1CV))
6328             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6329         }
6330       }
6331     }
6332   }
6333 
6334   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6335 
6336   // fold (shl c1, c2) -> c1<<c2
6337   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6338   if (N0C && N1C && !N1C->isOpaque())
6339     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6340   // fold (shl 0, x) -> 0
6341   if (isNullConstantOrNullSplatConstant(N0))
6342     return N0;
6343   // fold (shl x, c >= size(x)) -> undef
6344   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6345   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6346     return Val->getAPIntValue().uge(OpSizeInBits);
6347   };
6348   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6349     return DAG.getUNDEF(VT);
6350   // fold (shl x, 0) -> x
6351   if (N1C && N1C->isNullValue())
6352     return N0;
6353   // fold (shl undef, x) -> 0
6354   if (N0.isUndef())
6355     return DAG.getConstant(0, SDLoc(N), VT);
6356 
6357   if (SDValue NewSel = foldBinOpIntoSelect(N))
6358     return NewSel;
6359 
6360   // if (shl x, c) is known to be zero, return 0
6361   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6362                             APInt::getAllOnesValue(OpSizeInBits)))
6363     return DAG.getConstant(0, SDLoc(N), VT);
6364   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6365   if (N1.getOpcode() == ISD::TRUNCATE &&
6366       N1.getOperand(0).getOpcode() == ISD::AND) {
6367     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6368       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6369   }
6370 
6371   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6372     return SDValue(N, 0);
6373 
6374   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6375   if (N0.getOpcode() == ISD::SHL) {
6376     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6377                                           ConstantSDNode *RHS) {
6378       APInt c1 = LHS->getAPIntValue();
6379       APInt c2 = RHS->getAPIntValue();
6380       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6381       return (c1 + c2).uge(OpSizeInBits);
6382     };
6383     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6384       return DAG.getConstant(0, SDLoc(N), VT);
6385 
6386     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6387                                        ConstantSDNode *RHS) {
6388       APInt c1 = LHS->getAPIntValue();
6389       APInt c2 = RHS->getAPIntValue();
6390       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6391       return (c1 + c2).ult(OpSizeInBits);
6392     };
6393     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6394       SDLoc DL(N);
6395       EVT ShiftVT = N1.getValueType();
6396       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6397       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6398     }
6399   }
6400 
6401   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6402   // For this to be valid, the second form must not preserve any of the bits
6403   // that are shifted out by the inner shift in the first form.  This means
6404   // the outer shift size must be >= the number of bits added by the ext.
6405   // As a corollary, we don't care what kind of ext it is.
6406   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6407               N0.getOpcode() == ISD::ANY_EXTEND ||
6408               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6409       N0.getOperand(0).getOpcode() == ISD::SHL) {
6410     SDValue N0Op0 = N0.getOperand(0);
6411     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6412       APInt c1 = N0Op0C1->getAPIntValue();
6413       APInt c2 = N1C->getAPIntValue();
6414       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6415 
6416       EVT InnerShiftVT = N0Op0.getValueType();
6417       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6418       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6419         SDLoc DL(N0);
6420         APInt Sum = c1 + c2;
6421         if (Sum.uge(OpSizeInBits))
6422           return DAG.getConstant(0, DL, VT);
6423 
6424         return DAG.getNode(
6425             ISD::SHL, DL, VT,
6426             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6427             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6428       }
6429     }
6430   }
6431 
6432   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6433   // Only fold this if the inner zext has no other uses to avoid increasing
6434   // the total number of instructions.
6435   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6436       N0.getOperand(0).getOpcode() == ISD::SRL) {
6437     SDValue N0Op0 = N0.getOperand(0);
6438     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6439       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6440         uint64_t c1 = N0Op0C1->getZExtValue();
6441         uint64_t c2 = N1C->getZExtValue();
6442         if (c1 == c2) {
6443           SDValue NewOp0 = N0.getOperand(0);
6444           EVT CountVT = NewOp0.getOperand(1).getValueType();
6445           SDLoc DL(N);
6446           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6447                                        NewOp0,
6448                                        DAG.getConstant(c2, DL, CountVT));
6449           AddToWorklist(NewSHL.getNode());
6450           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6451         }
6452       }
6453     }
6454   }
6455 
6456   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6457   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6458   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6459       N0->getFlags().hasExact()) {
6460     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6461       uint64_t C1 = N0C1->getZExtValue();
6462       uint64_t C2 = N1C->getZExtValue();
6463       SDLoc DL(N);
6464       if (C1 <= C2)
6465         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6466                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6467       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6468                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6469     }
6470   }
6471 
6472   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6473   //                               (and (srl x, (sub c1, c2), MASK)
6474   // Only fold this if the inner shift has no other uses -- if it does, folding
6475   // this will increase the total number of instructions.
6476   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6477     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6478       uint64_t c1 = N0C1->getZExtValue();
6479       if (c1 < OpSizeInBits) {
6480         uint64_t c2 = N1C->getZExtValue();
6481         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6482         SDValue Shift;
6483         if (c2 > c1) {
6484           Mask <<= c2 - c1;
6485           SDLoc DL(N);
6486           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6487                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6488         } else {
6489           Mask.lshrInPlace(c1 - c2);
6490           SDLoc DL(N);
6491           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6492                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6493         }
6494         SDLoc DL(N0);
6495         return DAG.getNode(ISD::AND, DL, VT, Shift,
6496                            DAG.getConstant(Mask, DL, VT));
6497       }
6498     }
6499   }
6500 
6501   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6502   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6503       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6504     SDLoc DL(N);
6505     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6506     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6507     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6508   }
6509 
6510   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6511   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6512   // Variant of version done on multiply, except mul by a power of 2 is turned
6513   // into a shift.
6514   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6515       N0.getNode()->hasOneUse() &&
6516       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6517       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6518       TLI.isDesirableToCommuteWithShift(N, Level)) {
6519     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6520     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6521     AddToWorklist(Shl0.getNode());
6522     AddToWorklist(Shl1.getNode());
6523     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6524   }
6525 
6526   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6527   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6528       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6529       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6530     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6531     if (isConstantOrConstantVector(Shl))
6532       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6533   }
6534 
6535   if (N1C && !N1C->isOpaque())
6536     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6537       return NewSHL;
6538 
6539   return SDValue();
6540 }
6541 
6542 SDValue DAGCombiner::visitSRA(SDNode *N) {
6543   SDValue N0 = N->getOperand(0);
6544   SDValue N1 = N->getOperand(1);
6545   EVT VT = N0.getValueType();
6546   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6547 
6548   // Arithmetic shifting an all-sign-bit value is a no-op.
6549   // fold (sra 0, x) -> 0
6550   // fold (sra -1, x) -> -1
6551   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6552     return N0;
6553 
6554   // fold vector ops
6555   if (VT.isVector())
6556     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6557       return FoldedVOp;
6558 
6559   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6560 
6561   // fold (sra c1, c2) -> (sra c1, c2)
6562   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6563   if (N0C && N1C && !N1C->isOpaque())
6564     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6565   // fold (sra x, c >= size(x)) -> undef
6566   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6567   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6568     return Val->getAPIntValue().uge(OpSizeInBits);
6569   };
6570   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6571     return DAG.getUNDEF(VT);
6572   // fold (sra x, 0) -> x
6573   if (N1C && N1C->isNullValue())
6574     return N0;
6575 
6576   if (SDValue NewSel = foldBinOpIntoSelect(N))
6577     return NewSel;
6578 
6579   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6580   // sext_inreg.
6581   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6582     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6583     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6584     if (VT.isVector())
6585       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6586                                ExtVT, VT.getVectorNumElements());
6587     if ((!LegalOperations ||
6588          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6589       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6590                          N0.getOperand(0), DAG.getValueType(ExtVT));
6591   }
6592 
6593   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6594   // clamp (add c1, c2) to max shift.
6595   if (N0.getOpcode() == ISD::SRA) {
6596     SDLoc DL(N);
6597     EVT ShiftVT = N1.getValueType();
6598     EVT ShiftSVT = ShiftVT.getScalarType();
6599     SmallVector<SDValue, 16> ShiftValues;
6600 
6601     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6602       APInt c1 = LHS->getAPIntValue();
6603       APInt c2 = RHS->getAPIntValue();
6604       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6605       APInt Sum = c1 + c2;
6606       unsigned ShiftSum =
6607           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6608       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6609       return true;
6610     };
6611     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6612       SDValue ShiftValue;
6613       if (VT.isVector())
6614         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6615       else
6616         ShiftValue = ShiftValues[0];
6617       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6618     }
6619   }
6620 
6621   // fold (sra (shl X, m), (sub result_size, n))
6622   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6623   // result_size - n != m.
6624   // If truncate is free for the target sext(shl) is likely to result in better
6625   // code.
6626   if (N0.getOpcode() == ISD::SHL && N1C) {
6627     // Get the two constanst of the shifts, CN0 = m, CN = n.
6628     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6629     if (N01C) {
6630       LLVMContext &Ctx = *DAG.getContext();
6631       // Determine what the truncate's result bitsize and type would be.
6632       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6633 
6634       if (VT.isVector())
6635         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6636 
6637       // Determine the residual right-shift amount.
6638       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6639 
6640       // If the shift is not a no-op (in which case this should be just a sign
6641       // extend already), the truncated to type is legal, sign_extend is legal
6642       // on that type, and the truncate to that type is both legal and free,
6643       // perform the transform.
6644       if ((ShiftAmt > 0) &&
6645           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6646           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6647           TLI.isTruncateFree(VT, TruncVT)) {
6648         SDLoc DL(N);
6649         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6650             getShiftAmountTy(N0.getOperand(0).getValueType()));
6651         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6652                                     N0.getOperand(0), Amt);
6653         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6654                                     Shift);
6655         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6656                            N->getValueType(0), Trunc);
6657       }
6658     }
6659   }
6660 
6661   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6662   if (N1.getOpcode() == ISD::TRUNCATE &&
6663       N1.getOperand(0).getOpcode() == ISD::AND) {
6664     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6665       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6666   }
6667 
6668   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6669   //      if c1 is equal to the number of bits the trunc removes
6670   if (N0.getOpcode() == ISD::TRUNCATE &&
6671       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6672        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6673       N0.getOperand(0).hasOneUse() &&
6674       N0.getOperand(0).getOperand(1).hasOneUse() &&
6675       N1C) {
6676     SDValue N0Op0 = N0.getOperand(0);
6677     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6678       unsigned LargeShiftVal = LargeShift->getZExtValue();
6679       EVT LargeVT = N0Op0.getValueType();
6680 
6681       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6682         SDLoc DL(N);
6683         SDValue Amt =
6684           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6685                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6686         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6687                                   N0Op0.getOperand(0), Amt);
6688         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6689       }
6690     }
6691   }
6692 
6693   // Simplify, based on bits shifted out of the LHS.
6694   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6695     return SDValue(N, 0);
6696 
6697   // If the sign bit is known to be zero, switch this to a SRL.
6698   if (DAG.SignBitIsZero(N0))
6699     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6700 
6701   if (N1C && !N1C->isOpaque())
6702     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6703       return NewSRA;
6704 
6705   return SDValue();
6706 }
6707 
6708 SDValue DAGCombiner::visitSRL(SDNode *N) {
6709   SDValue N0 = N->getOperand(0);
6710   SDValue N1 = N->getOperand(1);
6711   EVT VT = N0.getValueType();
6712   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6713 
6714   // fold vector ops
6715   if (VT.isVector())
6716     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6717       return FoldedVOp;
6718 
6719   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6720 
6721   // fold (srl c1, c2) -> c1 >>u c2
6722   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6723   if (N0C && N1C && !N1C->isOpaque())
6724     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6725   // fold (srl 0, x) -> 0
6726   if (isNullConstantOrNullSplatConstant(N0))
6727     return N0;
6728   // fold (srl x, c >= size(x)) -> undef
6729   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6730   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6731     return Val->getAPIntValue().uge(OpSizeInBits);
6732   };
6733   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6734     return DAG.getUNDEF(VT);
6735   // fold (srl x, 0) -> x
6736   if (N1C && N1C->isNullValue())
6737     return N0;
6738 
6739   if (SDValue NewSel = foldBinOpIntoSelect(N))
6740     return NewSel;
6741 
6742   // if (srl x, c) is known to be zero, return 0
6743   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6744                                    APInt::getAllOnesValue(OpSizeInBits)))
6745     return DAG.getConstant(0, SDLoc(N), VT);
6746 
6747   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6748   if (N0.getOpcode() == ISD::SRL) {
6749     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6750                                           ConstantSDNode *RHS) {
6751       APInt c1 = LHS->getAPIntValue();
6752       APInt c2 = RHS->getAPIntValue();
6753       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6754       return (c1 + c2).uge(OpSizeInBits);
6755     };
6756     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6757       return DAG.getConstant(0, SDLoc(N), VT);
6758 
6759     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6760                                        ConstantSDNode *RHS) {
6761       APInt c1 = LHS->getAPIntValue();
6762       APInt c2 = RHS->getAPIntValue();
6763       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6764       return (c1 + c2).ult(OpSizeInBits);
6765     };
6766     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6767       SDLoc DL(N);
6768       EVT ShiftVT = N1.getValueType();
6769       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6770       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6771     }
6772   }
6773 
6774   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6775   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6776       N0.getOperand(0).getOpcode() == ISD::SRL) {
6777     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6778       uint64_t c1 = N001C->getZExtValue();
6779       uint64_t c2 = N1C->getZExtValue();
6780       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6781       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6782       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6783       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6784       if (c1 + OpSizeInBits == InnerShiftSize) {
6785         SDLoc DL(N0);
6786         if (c1 + c2 >= InnerShiftSize)
6787           return DAG.getConstant(0, DL, VT);
6788         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6789                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6790                                        N0.getOperand(0).getOperand(0),
6791                                        DAG.getConstant(c1 + c2, DL,
6792                                                        ShiftCountVT)));
6793       }
6794     }
6795   }
6796 
6797   // fold (srl (shl x, c), c) -> (and x, cst2)
6798   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6799       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6800     SDLoc DL(N);
6801     SDValue Mask =
6802         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6803     AddToWorklist(Mask.getNode());
6804     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6805   }
6806 
6807   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6808   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6809     // Shifting in all undef bits?
6810     EVT SmallVT = N0.getOperand(0).getValueType();
6811     unsigned BitSize = SmallVT.getScalarSizeInBits();
6812     if (N1C->getZExtValue() >= BitSize)
6813       return DAG.getUNDEF(VT);
6814 
6815     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6816       uint64_t ShiftAmt = N1C->getZExtValue();
6817       SDLoc DL0(N0);
6818       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6819                                        N0.getOperand(0),
6820                           DAG.getConstant(ShiftAmt, DL0,
6821                                           getShiftAmountTy(SmallVT)));
6822       AddToWorklist(SmallShift.getNode());
6823       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6824       SDLoc DL(N);
6825       return DAG.getNode(ISD::AND, DL, VT,
6826                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6827                          DAG.getConstant(Mask, DL, VT));
6828     }
6829   }
6830 
6831   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6832   // bit, which is unmodified by sra.
6833   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6834     if (N0.getOpcode() == ISD::SRA)
6835       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6836   }
6837 
6838   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6839   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6840       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6841     KnownBits Known;
6842     DAG.computeKnownBits(N0.getOperand(0), Known);
6843 
6844     // If any of the input bits are KnownOne, then the input couldn't be all
6845     // zeros, thus the result of the srl will always be zero.
6846     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6847 
6848     // If all of the bits input the to ctlz node are known to be zero, then
6849     // the result of the ctlz is "32" and the result of the shift is one.
6850     APInt UnknownBits = ~Known.Zero;
6851     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6852 
6853     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6854     if (UnknownBits.isPowerOf2()) {
6855       // Okay, we know that only that the single bit specified by UnknownBits
6856       // could be set on input to the CTLZ node. If this bit is set, the SRL
6857       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6858       // to an SRL/XOR pair, which is likely to simplify more.
6859       unsigned ShAmt = UnknownBits.countTrailingZeros();
6860       SDValue Op = N0.getOperand(0);
6861 
6862       if (ShAmt) {
6863         SDLoc DL(N0);
6864         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6865                   DAG.getConstant(ShAmt, DL,
6866                                   getShiftAmountTy(Op.getValueType())));
6867         AddToWorklist(Op.getNode());
6868       }
6869 
6870       SDLoc DL(N);
6871       return DAG.getNode(ISD::XOR, DL, VT,
6872                          Op, DAG.getConstant(1, DL, VT));
6873     }
6874   }
6875 
6876   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6877   if (N1.getOpcode() == ISD::TRUNCATE &&
6878       N1.getOperand(0).getOpcode() == ISD::AND) {
6879     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6880       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6881   }
6882 
6883   // fold operands of srl based on knowledge that the low bits are not
6884   // demanded.
6885   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6886     return SDValue(N, 0);
6887 
6888   if (N1C && !N1C->isOpaque())
6889     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6890       return NewSRL;
6891 
6892   // Attempt to convert a srl of a load into a narrower zero-extending load.
6893   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6894     return NarrowLoad;
6895 
6896   // Here is a common situation. We want to optimize:
6897   //
6898   //   %a = ...
6899   //   %b = and i32 %a, 2
6900   //   %c = srl i32 %b, 1
6901   //   brcond i32 %c ...
6902   //
6903   // into
6904   //
6905   //   %a = ...
6906   //   %b = and %a, 2
6907   //   %c = setcc eq %b, 0
6908   //   brcond %c ...
6909   //
6910   // However when after the source operand of SRL is optimized into AND, the SRL
6911   // itself may not be optimized further. Look for it and add the BRCOND into
6912   // the worklist.
6913   if (N->hasOneUse()) {
6914     SDNode *Use = *N->use_begin();
6915     if (Use->getOpcode() == ISD::BRCOND)
6916       AddToWorklist(Use);
6917     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6918       // Also look pass the truncate.
6919       Use = *Use->use_begin();
6920       if (Use->getOpcode() == ISD::BRCOND)
6921         AddToWorklist(Use);
6922     }
6923   }
6924 
6925   return SDValue();
6926 }
6927 
6928 SDValue DAGCombiner::visitABS(SDNode *N) {
6929   SDValue N0 = N->getOperand(0);
6930   EVT VT = N->getValueType(0);
6931 
6932   // fold (abs c1) -> c2
6933   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6934     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6935   // fold (abs (abs x)) -> (abs x)
6936   if (N0.getOpcode() == ISD::ABS)
6937     return N0;
6938   // fold (abs x) -> x iff not-negative
6939   if (DAG.SignBitIsZero(N0))
6940     return N0;
6941   return SDValue();
6942 }
6943 
6944 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6945   SDValue N0 = N->getOperand(0);
6946   EVT VT = N->getValueType(0);
6947 
6948   // fold (bswap c1) -> c2
6949   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6950     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6951   // fold (bswap (bswap x)) -> x
6952   if (N0.getOpcode() == ISD::BSWAP)
6953     return N0->getOperand(0);
6954   return SDValue();
6955 }
6956 
6957 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6958   SDValue N0 = N->getOperand(0);
6959   EVT VT = N->getValueType(0);
6960 
6961   // fold (bitreverse c1) -> c2
6962   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6963     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6964   // fold (bitreverse (bitreverse x)) -> x
6965   if (N0.getOpcode() == ISD::BITREVERSE)
6966     return N0.getOperand(0);
6967   return SDValue();
6968 }
6969 
6970 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6971   SDValue N0 = N->getOperand(0);
6972   EVT VT = N->getValueType(0);
6973 
6974   // fold (ctlz c1) -> c2
6975   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6976     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6977 
6978   // If the value is known never to be zero, switch to the undef version.
6979   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6980     if (DAG.isKnownNeverZero(N0))
6981       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6982   }
6983 
6984   return SDValue();
6985 }
6986 
6987 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6988   SDValue N0 = N->getOperand(0);
6989   EVT VT = N->getValueType(0);
6990 
6991   // fold (ctlz_zero_undef c1) -> c2
6992   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6993     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6994   return SDValue();
6995 }
6996 
6997 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6998   SDValue N0 = N->getOperand(0);
6999   EVT VT = N->getValueType(0);
7000 
7001   // fold (cttz c1) -> c2
7002   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7003     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7004 
7005   // If the value is known never to be zero, switch to the undef version.
7006   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7007     if (DAG.isKnownNeverZero(N0))
7008       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7009   }
7010 
7011   return SDValue();
7012 }
7013 
7014 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7015   SDValue N0 = N->getOperand(0);
7016   EVT VT = N->getValueType(0);
7017 
7018   // fold (cttz_zero_undef c1) -> c2
7019   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7020     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7021   return SDValue();
7022 }
7023 
7024 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7025   SDValue N0 = N->getOperand(0);
7026   EVT VT = N->getValueType(0);
7027 
7028   // fold (ctpop c1) -> c2
7029   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7030     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7031   return SDValue();
7032 }
7033 
7034 /// Generate Min/Max node
7035 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7036                                    SDValue RHS, SDValue True, SDValue False,
7037                                    ISD::CondCode CC, const TargetLowering &TLI,
7038                                    SelectionDAG &DAG) {
7039   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7040     return SDValue();
7041 
7042   switch (CC) {
7043   case ISD::SETOLT:
7044   case ISD::SETOLE:
7045   case ISD::SETLT:
7046   case ISD::SETLE:
7047   case ISD::SETULT:
7048   case ISD::SETULE: {
7049     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7050     if (TLI.isOperationLegal(Opcode, VT))
7051       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7052     return SDValue();
7053   }
7054   case ISD::SETOGT:
7055   case ISD::SETOGE:
7056   case ISD::SETGT:
7057   case ISD::SETGE:
7058   case ISD::SETUGT:
7059   case ISD::SETUGE: {
7060     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7061     if (TLI.isOperationLegal(Opcode, VT))
7062       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7063     return SDValue();
7064   }
7065   default:
7066     return SDValue();
7067   }
7068 }
7069 
7070 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7071   SDValue Cond = N->getOperand(0);
7072   SDValue N1 = N->getOperand(1);
7073   SDValue N2 = N->getOperand(2);
7074   EVT VT = N->getValueType(0);
7075   EVT CondVT = Cond.getValueType();
7076   SDLoc DL(N);
7077 
7078   if (!VT.isInteger())
7079     return SDValue();
7080 
7081   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7082   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7083   if (!C1 || !C2)
7084     return SDValue();
7085 
7086   // Only do this before legalization to avoid conflicting with target-specific
7087   // transforms in the other direction (create a select from a zext/sext). There
7088   // is also a target-independent combine here in DAGCombiner in the other
7089   // direction for (select Cond, -1, 0) when the condition is not i1.
7090   if (CondVT == MVT::i1 && !LegalOperations) {
7091     if (C1->isNullValue() && C2->isOne()) {
7092       // select Cond, 0, 1 --> zext (!Cond)
7093       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7094       if (VT != MVT::i1)
7095         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7096       return NotCond;
7097     }
7098     if (C1->isNullValue() && C2->isAllOnesValue()) {
7099       // select Cond, 0, -1 --> sext (!Cond)
7100       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7101       if (VT != MVT::i1)
7102         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7103       return NotCond;
7104     }
7105     if (C1->isOne() && C2->isNullValue()) {
7106       // select Cond, 1, 0 --> zext (Cond)
7107       if (VT != MVT::i1)
7108         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7109       return Cond;
7110     }
7111     if (C1->isAllOnesValue() && C2->isNullValue()) {
7112       // select Cond, -1, 0 --> sext (Cond)
7113       if (VT != MVT::i1)
7114         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7115       return Cond;
7116     }
7117 
7118     // For any constants that differ by 1, we can transform the select into an
7119     // extend and add. Use a target hook because some targets may prefer to
7120     // transform in the other direction.
7121     if (TLI.convertSelectOfConstantsToMath(VT)) {
7122       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7123         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7124         if (VT != MVT::i1)
7125           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7126         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7127       }
7128       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7129         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7130         if (VT != MVT::i1)
7131           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7132         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7133       }
7134     }
7135 
7136     return SDValue();
7137   }
7138 
7139   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7140   // We can't do this reliably if integer based booleans have different contents
7141   // to floating point based booleans. This is because we can't tell whether we
7142   // have an integer-based boolean or a floating-point-based boolean unless we
7143   // can find the SETCC that produced it and inspect its operands. This is
7144   // fairly easy if C is the SETCC node, but it can potentially be
7145   // undiscoverable (or not reasonably discoverable). For example, it could be
7146   // in another basic block or it could require searching a complicated
7147   // expression.
7148   if (CondVT.isInteger() &&
7149       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7150           TargetLowering::ZeroOrOneBooleanContent &&
7151       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7152           TargetLowering::ZeroOrOneBooleanContent &&
7153       C1->isNullValue() && C2->isOne()) {
7154     SDValue NotCond =
7155         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7156     if (VT.bitsEq(CondVT))
7157       return NotCond;
7158     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7159   }
7160 
7161   return SDValue();
7162 }
7163 
7164 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7165   SDValue N0 = N->getOperand(0);
7166   SDValue N1 = N->getOperand(1);
7167   SDValue N2 = N->getOperand(2);
7168   EVT VT = N->getValueType(0);
7169   EVT VT0 = N0.getValueType();
7170   SDLoc DL(N);
7171 
7172   // fold (select C, X, X) -> X
7173   if (N1 == N2)
7174     return N1;
7175 
7176   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
7177     // fold (select true, X, Y) -> X
7178     // fold (select false, X, Y) -> Y
7179     return !N0C->isNullValue() ? N1 : N2;
7180   }
7181 
7182   // fold (select X, X, Y) -> (or X, Y)
7183   // fold (select X, 1, Y) -> (or C, Y)
7184   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7185     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7186 
7187   if (SDValue V = foldSelectOfConstants(N))
7188     return V;
7189 
7190   // fold (select C, 0, X) -> (and (not C), X)
7191   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7192     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7193     AddToWorklist(NOTNode.getNode());
7194     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7195   }
7196   // fold (select C, X, 1) -> (or (not C), X)
7197   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7198     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7199     AddToWorklist(NOTNode.getNode());
7200     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7201   }
7202   // fold (select X, Y, X) -> (and X, Y)
7203   // fold (select X, Y, 0) -> (and X, Y)
7204   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7205     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7206 
7207   // If we can fold this based on the true/false value, do so.
7208   if (SimplifySelectOps(N, N1, N2))
7209     return SDValue(N, 0); // Don't revisit N.
7210 
7211   if (VT0 == MVT::i1) {
7212     // The code in this block deals with the following 2 equivalences:
7213     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7214     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7215     // The target can specify its preferred form with the
7216     // shouldNormalizeToSelectSequence() callback. However we always transform
7217     // to the right anyway if we find the inner select exists in the DAG anyway
7218     // and we always transform to the left side if we know that we can further
7219     // optimize the combination of the conditions.
7220     bool normalizeToSequence =
7221         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7222     // select (and Cond0, Cond1), X, Y
7223     //   -> select Cond0, (select Cond1, X, Y), Y
7224     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7225       SDValue Cond0 = N0->getOperand(0);
7226       SDValue Cond1 = N0->getOperand(1);
7227       SDValue InnerSelect =
7228           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7229       if (normalizeToSequence || !InnerSelect.use_empty())
7230         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7231                            InnerSelect, N2);
7232     }
7233     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7234     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7235       SDValue Cond0 = N0->getOperand(0);
7236       SDValue Cond1 = N0->getOperand(1);
7237       SDValue InnerSelect =
7238           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7239       if (normalizeToSequence || !InnerSelect.use_empty())
7240         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7241                            InnerSelect);
7242     }
7243 
7244     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7245     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7246       SDValue N1_0 = N1->getOperand(0);
7247       SDValue N1_1 = N1->getOperand(1);
7248       SDValue N1_2 = N1->getOperand(2);
7249       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7250         // Create the actual and node if we can generate good code for it.
7251         if (!normalizeToSequence) {
7252           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7253           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7254         }
7255         // Otherwise see if we can optimize the "and" to a better pattern.
7256         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7257           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7258                              N2);
7259       }
7260     }
7261     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7262     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7263       SDValue N2_0 = N2->getOperand(0);
7264       SDValue N2_1 = N2->getOperand(1);
7265       SDValue N2_2 = N2->getOperand(2);
7266       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7267         // Create the actual or node if we can generate good code for it.
7268         if (!normalizeToSequence) {
7269           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7270           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7271         }
7272         // Otherwise see if we can optimize to a better pattern.
7273         if (SDValue Combined = visitORLike(N0, N2_0, N))
7274           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7275                              N2_2);
7276       }
7277     }
7278   }
7279 
7280   if (VT0 == MVT::i1) {
7281     // select (not Cond), N1, N2 -> select Cond, N2, N1
7282     if (isBitwiseNot(N0))
7283       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7284   }
7285 
7286   // fold selects based on a setcc into other things, such as min/max/abs
7287   if (N0.getOpcode() == ISD::SETCC) {
7288     // select x, y (fcmp lt x, y) -> fminnum x, y
7289     // select x, y (fcmp gt x, y) -> fmaxnum x, y
7290     //
7291     // This is OK if we don't care about what happens if either operand is a
7292     // NaN.
7293     //
7294 
7295     // FIXME: This should be checking for no signed zeros on individual
7296     // operands, as well as no nans.
7297     const TargetOptions &Options = DAG.getTarget().Options;
7298     if (Options.NoSignedZerosFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
7299         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
7300       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7301 
7302       if (SDValue FMinMax = combineMinNumMaxNum(
7303               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7304         return FMinMax;
7305     }
7306 
7307     if ((!LegalOperations &&
7308          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
7309         TLI.isOperationLegal(ISD::SELECT_CC, VT))
7310       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
7311                          N0.getOperand(1), N1, N2, N0.getOperand(2));
7312     return SimplifySelect(DL, N0, N1, N2);
7313   }
7314 
7315   return SDValue();
7316 }
7317 
7318 static
7319 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7320   SDLoc DL(N);
7321   EVT LoVT, HiVT;
7322   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7323 
7324   // Split the inputs.
7325   SDValue Lo, Hi, LL, LH, RL, RH;
7326   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7327   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7328 
7329   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7330   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7331 
7332   return std::make_pair(Lo, Hi);
7333 }
7334 
7335 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7336 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7337 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7338   SDLoc DL(N);
7339   SDValue Cond = N->getOperand(0);
7340   SDValue LHS = N->getOperand(1);
7341   SDValue RHS = N->getOperand(2);
7342   EVT VT = N->getValueType(0);
7343   int NumElems = VT.getVectorNumElements();
7344   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7345          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7346          Cond.getOpcode() == ISD::BUILD_VECTOR);
7347 
7348   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7349   // binary ones here.
7350   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7351     return SDValue();
7352 
7353   // We're sure we have an even number of elements due to the
7354   // concat_vectors we have as arguments to vselect.
7355   // Skip BV elements until we find one that's not an UNDEF
7356   // After we find an UNDEF element, keep looping until we get to half the
7357   // length of the BV and see if all the non-undef nodes are the same.
7358   ConstantSDNode *BottomHalf = nullptr;
7359   for (int i = 0; i < NumElems / 2; ++i) {
7360     if (Cond->getOperand(i)->isUndef())
7361       continue;
7362 
7363     if (BottomHalf == nullptr)
7364       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7365     else if (Cond->getOperand(i).getNode() != BottomHalf)
7366       return SDValue();
7367   }
7368 
7369   // Do the same for the second half of the BuildVector
7370   ConstantSDNode *TopHalf = nullptr;
7371   for (int i = NumElems / 2; i < NumElems; ++i) {
7372     if (Cond->getOperand(i)->isUndef())
7373       continue;
7374 
7375     if (TopHalf == nullptr)
7376       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7377     else if (Cond->getOperand(i).getNode() != TopHalf)
7378       return SDValue();
7379   }
7380 
7381   assert(TopHalf && BottomHalf &&
7382          "One half of the selector was all UNDEFs and the other was all the "
7383          "same value. This should have been addressed before this function.");
7384   return DAG.getNode(
7385       ISD::CONCAT_VECTORS, DL, VT,
7386       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7387       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7388 }
7389 
7390 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7391   if (Level >= AfterLegalizeTypes)
7392     return SDValue();
7393 
7394   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7395   SDValue Mask = MSC->getMask();
7396   SDValue Data  = MSC->getValue();
7397   SDLoc DL(N);
7398 
7399   // If the MSCATTER data type requires splitting and the mask is provided by a
7400   // SETCC, then split both nodes and its operands before legalization. This
7401   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7402   // and enables future optimizations (e.g. min/max pattern matching on X86).
7403   if (Mask.getOpcode() != ISD::SETCC)
7404     return SDValue();
7405 
7406   // Check if any splitting is required.
7407   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7408       TargetLowering::TypeSplitVector)
7409     return SDValue();
7410   SDValue MaskLo, MaskHi;
7411   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7412 
7413   EVT LoVT, HiVT;
7414   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7415 
7416   SDValue Chain = MSC->getChain();
7417 
7418   EVT MemoryVT = MSC->getMemoryVT();
7419   unsigned Alignment = MSC->getOriginalAlignment();
7420 
7421   EVT LoMemVT, HiMemVT;
7422   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7423 
7424   SDValue DataLo, DataHi;
7425   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7426 
7427   SDValue Scale = MSC->getScale();
7428   SDValue BasePtr = MSC->getBasePtr();
7429   SDValue IndexLo, IndexHi;
7430   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7431 
7432   MachineMemOperand *MMO = DAG.getMachineFunction().
7433     getMachineMemOperand(MSC->getPointerInfo(),
7434                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7435                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7436 
7437   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7438   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7439                                     DataLo.getValueType(), DL, OpsLo, MMO);
7440 
7441   // The order of the Scatter operation after split is well defined. The "Hi"
7442   // part comes after the "Lo". So these two operations should be chained one
7443   // after another.
7444   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7445   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7446                               DL, OpsHi, MMO);
7447 }
7448 
7449 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7450   if (Level >= AfterLegalizeTypes)
7451     return SDValue();
7452 
7453   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7454   SDValue Mask = MST->getMask();
7455   SDValue Data  = MST->getValue();
7456   EVT VT = Data.getValueType();
7457   SDLoc DL(N);
7458 
7459   // If the MSTORE data type requires splitting and the mask is provided by a
7460   // SETCC, then split both nodes and its operands before legalization. This
7461   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7462   // and enables future optimizations (e.g. min/max pattern matching on X86).
7463   if (Mask.getOpcode() == ISD::SETCC) {
7464     // Check if any splitting is required.
7465     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7466         TargetLowering::TypeSplitVector)
7467       return SDValue();
7468 
7469     SDValue MaskLo, MaskHi, Lo, Hi;
7470     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7471 
7472     SDValue Chain = MST->getChain();
7473     SDValue Ptr   = MST->getBasePtr();
7474 
7475     EVT MemoryVT = MST->getMemoryVT();
7476     unsigned Alignment = MST->getOriginalAlignment();
7477 
7478     // if Alignment is equal to the vector size,
7479     // take the half of it for the second part
7480     unsigned SecondHalfAlignment =
7481       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7482 
7483     EVT LoMemVT, HiMemVT;
7484     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7485 
7486     SDValue DataLo, DataHi;
7487     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7488 
7489     MachineMemOperand *MMO = DAG.getMachineFunction().
7490       getMachineMemOperand(MST->getPointerInfo(),
7491                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7492                            Alignment, MST->getAAInfo(), MST->getRanges());
7493 
7494     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7495                             MST->isTruncatingStore(),
7496                             MST->isCompressingStore());
7497 
7498     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7499                                      MST->isCompressingStore());
7500     unsigned HiOffset = LoMemVT.getStoreSize();
7501 
7502     MMO = DAG.getMachineFunction().getMachineMemOperand(
7503         MST->getPointerInfo().getWithOffset(HiOffset),
7504         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7505         MST->getAAInfo(), MST->getRanges());
7506 
7507     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7508                             MST->isTruncatingStore(),
7509                             MST->isCompressingStore());
7510 
7511     AddToWorklist(Lo.getNode());
7512     AddToWorklist(Hi.getNode());
7513 
7514     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7515   }
7516   return SDValue();
7517 }
7518 
7519 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7520   if (Level >= AfterLegalizeTypes)
7521     return SDValue();
7522 
7523   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7524   SDValue Mask = MGT->getMask();
7525   SDLoc DL(N);
7526 
7527   // If the MGATHER result requires splitting and the mask is provided by a
7528   // SETCC, then split both nodes and its operands before legalization. This
7529   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7530   // and enables future optimizations (e.g. min/max pattern matching on X86).
7531 
7532   if (Mask.getOpcode() != ISD::SETCC)
7533     return SDValue();
7534 
7535   EVT VT = N->getValueType(0);
7536 
7537   // Check if any splitting is required.
7538   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7539       TargetLowering::TypeSplitVector)
7540     return SDValue();
7541 
7542   SDValue MaskLo, MaskHi, Lo, Hi;
7543   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7544 
7545   SDValue PassThru = MGT->getPassThru();
7546   SDValue PassThruLo, PassThruHi;
7547   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7548 
7549   EVT LoVT, HiVT;
7550   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7551 
7552   SDValue Chain = MGT->getChain();
7553   EVT MemoryVT = MGT->getMemoryVT();
7554   unsigned Alignment = MGT->getOriginalAlignment();
7555 
7556   EVT LoMemVT, HiMemVT;
7557   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7558 
7559   SDValue Scale = MGT->getScale();
7560   SDValue BasePtr = MGT->getBasePtr();
7561   SDValue Index = MGT->getIndex();
7562   SDValue IndexLo, IndexHi;
7563   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7564 
7565   MachineMemOperand *MMO = DAG.getMachineFunction().
7566     getMachineMemOperand(MGT->getPointerInfo(),
7567                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7568                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7569 
7570   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7571   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7572                            MMO);
7573 
7574   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7575   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7576                            MMO);
7577 
7578   AddToWorklist(Lo.getNode());
7579   AddToWorklist(Hi.getNode());
7580 
7581   // Build a factor node to remember that this load is independent of the
7582   // other one.
7583   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7584                       Hi.getValue(1));
7585 
7586   // Legalized the chain result - switch anything that used the old chain to
7587   // use the new one.
7588   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7589 
7590   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7591 
7592   SDValue RetOps[] = { GatherRes, Chain };
7593   return DAG.getMergeValues(RetOps, DL);
7594 }
7595 
7596 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7597   if (Level >= AfterLegalizeTypes)
7598     return SDValue();
7599 
7600   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7601   SDValue Mask = MLD->getMask();
7602   SDLoc DL(N);
7603 
7604   // If the MLOAD result requires splitting and the mask is provided by a
7605   // SETCC, then split both nodes and its operands before legalization. This
7606   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7607   // and enables future optimizations (e.g. min/max pattern matching on X86).
7608   if (Mask.getOpcode() == ISD::SETCC) {
7609     EVT VT = N->getValueType(0);
7610 
7611     // Check if any splitting is required.
7612     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7613         TargetLowering::TypeSplitVector)
7614       return SDValue();
7615 
7616     SDValue MaskLo, MaskHi, Lo, Hi;
7617     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7618 
7619     SDValue PassThru = MLD->getPassThru();
7620     SDValue PassThruLo, PassThruHi;
7621     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7622 
7623     EVT LoVT, HiVT;
7624     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7625 
7626     SDValue Chain = MLD->getChain();
7627     SDValue Ptr   = MLD->getBasePtr();
7628     EVT MemoryVT = MLD->getMemoryVT();
7629     unsigned Alignment = MLD->getOriginalAlignment();
7630 
7631     // if Alignment is equal to the vector size,
7632     // take the half of it for the second part
7633     unsigned SecondHalfAlignment =
7634       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7635          Alignment/2 : Alignment;
7636 
7637     EVT LoMemVT, HiMemVT;
7638     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7639 
7640     MachineMemOperand *MMO = DAG.getMachineFunction().
7641     getMachineMemOperand(MLD->getPointerInfo(),
7642                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7643                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7644 
7645     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7646                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7647 
7648     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7649                                      MLD->isExpandingLoad());
7650     unsigned HiOffset = LoMemVT.getStoreSize();
7651 
7652     MMO = DAG.getMachineFunction().getMachineMemOperand(
7653         MLD->getPointerInfo().getWithOffset(HiOffset),
7654         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7655         MLD->getAAInfo(), MLD->getRanges());
7656 
7657     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7658                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7659 
7660     AddToWorklist(Lo.getNode());
7661     AddToWorklist(Hi.getNode());
7662 
7663     // Build a factor node to remember that this load is independent of the
7664     // other one.
7665     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7666                         Hi.getValue(1));
7667 
7668     // Legalized the chain result - switch anything that used the old chain to
7669     // use the new one.
7670     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7671 
7672     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7673 
7674     SDValue RetOps[] = { LoadRes, Chain };
7675     return DAG.getMergeValues(RetOps, DL);
7676   }
7677   return SDValue();
7678 }
7679 
7680 /// A vector select of 2 constant vectors can be simplified to math/logic to
7681 /// avoid a variable select instruction and possibly avoid constant loads.
7682 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7683   SDValue Cond = N->getOperand(0);
7684   SDValue N1 = N->getOperand(1);
7685   SDValue N2 = N->getOperand(2);
7686   EVT VT = N->getValueType(0);
7687   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7688       !TLI.convertSelectOfConstantsToMath(VT) ||
7689       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7690       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7691     return SDValue();
7692 
7693   // Check if we can use the condition value to increment/decrement a single
7694   // constant value. This simplifies a select to an add and removes a constant
7695   // load/materialization from the general case.
7696   bool AllAddOne = true;
7697   bool AllSubOne = true;
7698   unsigned Elts = VT.getVectorNumElements();
7699   for (unsigned i = 0; i != Elts; ++i) {
7700     SDValue N1Elt = N1.getOperand(i);
7701     SDValue N2Elt = N2.getOperand(i);
7702     if (N1Elt.isUndef() || N2Elt.isUndef())
7703       continue;
7704 
7705     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7706     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7707     if (C1 != C2 + 1)
7708       AllAddOne = false;
7709     if (C1 != C2 - 1)
7710       AllSubOne = false;
7711   }
7712 
7713   // Further simplifications for the extra-special cases where the constants are
7714   // all 0 or all -1 should be implemented as folds of these patterns.
7715   SDLoc DL(N);
7716   if (AllAddOne || AllSubOne) {
7717     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7718     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7719     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7720     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7721     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7722   }
7723 
7724   // The general case for select-of-constants:
7725   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7726   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7727   // leave that to a machine-specific pass.
7728   return SDValue();
7729 }
7730 
7731 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7732   SDValue N0 = N->getOperand(0);
7733   SDValue N1 = N->getOperand(1);
7734   SDValue N2 = N->getOperand(2);
7735   SDLoc DL(N);
7736 
7737   // fold (vselect C, X, X) -> X
7738   if (N1 == N2)
7739     return N1;
7740 
7741   // Canonicalize integer abs.
7742   // vselect (setg[te] X,  0),  X, -X ->
7743   // vselect (setgt    X, -1),  X, -X ->
7744   // vselect (setl[te] X,  0), -X,  X ->
7745   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7746   if (N0.getOpcode() == ISD::SETCC) {
7747     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7748     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7749     bool isAbs = false;
7750     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7751 
7752     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7753          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7754         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7755       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7756     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7757              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7758       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7759 
7760     if (isAbs) {
7761       EVT VT = LHS.getValueType();
7762       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7763         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7764 
7765       SDValue Shift = DAG.getNode(
7766           ISD::SRA, DL, VT, LHS,
7767           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7768       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7769       AddToWorklist(Shift.getNode());
7770       AddToWorklist(Add.getNode());
7771       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7772     }
7773 
7774     // If this select has a condition (setcc) with narrower operands than the
7775     // select, try to widen the compare to match the select width.
7776     // TODO: This should be extended to handle any constant.
7777     // TODO: This could be extended to handle non-loading patterns, but that
7778     //       requires thorough testing to avoid regressions.
7779     if (isNullConstantOrNullSplatConstant(RHS)) {
7780       EVT NarrowVT = LHS.getValueType();
7781       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7782       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7783       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7784       unsigned WideWidth = WideVT.getScalarSizeInBits();
7785       bool IsSigned = isSignedIntSetCC(CC);
7786       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7787       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7788           SetCCWidth != 1 && SetCCWidth < WideWidth &&
7789           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7790           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7791         // Both compare operands can be widened for free. The LHS can use an
7792         // extended load, and the RHS is a constant:
7793         //   vselect (ext (setcc load(X), C)), N1, N2 -->
7794         //   vselect (setcc extload(X), C'), N1, N2
7795         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7796         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7797         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7798         EVT WideSetCCVT = getSetCCResultType(WideVT);
7799         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7800         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7801       }
7802     }
7803   }
7804 
7805   if (SimplifySelectOps(N, N1, N2))
7806     return SDValue(N, 0);  // Don't revisit N.
7807 
7808   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7809   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7810     return N1;
7811   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7812   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7813     return N2;
7814 
7815   // The ConvertSelectToConcatVector function is assuming both the above
7816   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7817   // and addressed.
7818   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7819       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7820       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7821     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7822       return CV;
7823   }
7824 
7825   if (SDValue V = foldVSelectOfConstants(N))
7826     return V;
7827 
7828   return SDValue();
7829 }
7830 
7831 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7832   SDValue N0 = N->getOperand(0);
7833   SDValue N1 = N->getOperand(1);
7834   SDValue N2 = N->getOperand(2);
7835   SDValue N3 = N->getOperand(3);
7836   SDValue N4 = N->getOperand(4);
7837   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7838 
7839   // fold select_cc lhs, rhs, x, x, cc -> x
7840   if (N2 == N3)
7841     return N2;
7842 
7843   // Determine if the condition we're dealing with is constant
7844   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7845                                   CC, SDLoc(N), false)) {
7846     AddToWorklist(SCC.getNode());
7847 
7848     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7849       if (!SCCC->isNullValue())
7850         return N2;    // cond always true -> true val
7851       else
7852         return N3;    // cond always false -> false val
7853     } else if (SCC->isUndef()) {
7854       // When the condition is UNDEF, just return the first operand. This is
7855       // coherent the DAG creation, no setcc node is created in this case
7856       return N2;
7857     } else if (SCC.getOpcode() == ISD::SETCC) {
7858       // Fold to a simpler select_cc
7859       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7860                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7861                          SCC.getOperand(2));
7862     }
7863   }
7864 
7865   // If we can fold this based on the true/false value, do so.
7866   if (SimplifySelectOps(N, N2, N3))
7867     return SDValue(N, 0);  // Don't revisit N.
7868 
7869   // fold select_cc into other things, such as min/max/abs
7870   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7871 }
7872 
7873 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7874   // setcc is very commonly used as an argument to brcond. This pattern
7875   // also lend itself to numerous combines and, as a result, it is desired
7876   // we keep the argument to a brcond as a setcc as much as possible.
7877   bool PreferSetCC =
7878       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7879 
7880   SDValue Combined = SimplifySetCC(
7881       N->getValueType(0), N->getOperand(0), N->getOperand(1),
7882       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7883 
7884   if (!Combined)
7885     return SDValue();
7886 
7887   // If we prefer to have a setcc, and we don't, we'll try our best to
7888   // recreate one using rebuildSetCC.
7889   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7890     SDValue NewSetCC = rebuildSetCC(Combined);
7891 
7892     // We don't have anything interesting to combine to.
7893     if (NewSetCC.getNode() == N)
7894       return SDValue();
7895 
7896     if (NewSetCC)
7897       return NewSetCC;
7898   }
7899 
7900   return Combined;
7901 }
7902 
7903 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7904   SDValue LHS = N->getOperand(0);
7905   SDValue RHS = N->getOperand(1);
7906   SDValue Carry = N->getOperand(2);
7907   SDValue Cond = N->getOperand(3);
7908 
7909   // If Carry is false, fold to a regular SETCC.
7910   if (isNullConstant(Carry))
7911     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7912 
7913   return SDValue();
7914 }
7915 
7916 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7917 /// a build_vector of constants.
7918 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7919 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7920 /// Vector extends are not folded if operations are legal; this is to
7921 /// avoid introducing illegal build_vector dag nodes.
7922 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7923                                          SelectionDAG &DAG, bool LegalTypes,
7924                                          bool LegalOperations) {
7925   unsigned Opcode = N->getOpcode();
7926   SDValue N0 = N->getOperand(0);
7927   EVT VT = N->getValueType(0);
7928 
7929   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7930          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7931          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7932          && "Expected EXTEND dag node in input!");
7933 
7934   // fold (sext c1) -> c1
7935   // fold (zext c1) -> c1
7936   // fold (aext c1) -> c1
7937   if (isa<ConstantSDNode>(N0))
7938     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7939 
7940   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7941   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7942   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7943   EVT SVT = VT.getScalarType();
7944   if (!(VT.isVector() &&
7945       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7946       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7947     return nullptr;
7948 
7949   // We can fold this node into a build_vector.
7950   unsigned VTBits = SVT.getSizeInBits();
7951   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7952   SmallVector<SDValue, 8> Elts;
7953   unsigned NumElts = VT.getVectorNumElements();
7954   SDLoc DL(N);
7955 
7956   for (unsigned i=0; i != NumElts; ++i) {
7957     SDValue Op = N0->getOperand(i);
7958     if (Op->isUndef()) {
7959       Elts.push_back(DAG.getUNDEF(SVT));
7960       continue;
7961     }
7962 
7963     SDLoc DL(Op);
7964     // Get the constant value and if needed trunc it to the size of the type.
7965     // Nodes like build_vector might have constants wider than the scalar type.
7966     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7967     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7968       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7969     else
7970       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7971   }
7972 
7973   return DAG.getBuildVector(VT, DL, Elts).getNode();
7974 }
7975 
7976 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7977 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7978 // transformation. Returns true if extension are possible and the above
7979 // mentioned transformation is profitable.
7980 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7981                                     unsigned ExtOpc,
7982                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7983                                     const TargetLowering &TLI) {
7984   bool HasCopyToRegUses = false;
7985   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7986   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7987                             UE = N0.getNode()->use_end();
7988        UI != UE; ++UI) {
7989     SDNode *User = *UI;
7990     if (User == N)
7991       continue;
7992     if (UI.getUse().getResNo() != N0.getResNo())
7993       continue;
7994     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7995     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7996       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7997       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7998         // Sign bits will be lost after a zext.
7999         return false;
8000       bool Add = false;
8001       for (unsigned i = 0; i != 2; ++i) {
8002         SDValue UseOp = User->getOperand(i);
8003         if (UseOp == N0)
8004           continue;
8005         if (!isa<ConstantSDNode>(UseOp))
8006           return false;
8007         Add = true;
8008       }
8009       if (Add)
8010         ExtendNodes.push_back(User);
8011       continue;
8012     }
8013     // If truncates aren't free and there are users we can't
8014     // extend, it isn't worthwhile.
8015     if (!isTruncFree)
8016       return false;
8017     // Remember if this value is live-out.
8018     if (User->getOpcode() == ISD::CopyToReg)
8019       HasCopyToRegUses = true;
8020   }
8021 
8022   if (HasCopyToRegUses) {
8023     bool BothLiveOut = false;
8024     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8025          UI != UE; ++UI) {
8026       SDUse &Use = UI.getUse();
8027       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8028         BothLiveOut = true;
8029         break;
8030       }
8031     }
8032     if (BothLiveOut)
8033       // Both unextended and extended values are live out. There had better be
8034       // a good reason for the transformation.
8035       return ExtendNodes.size();
8036   }
8037   return true;
8038 }
8039 
8040 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8041                                   SDValue OrigLoad, SDValue ExtLoad,
8042                                   ISD::NodeType ExtType) {
8043   // Extend SetCC uses if necessary.
8044   SDLoc DL(ExtLoad);
8045   for (SDNode *SetCC : SetCCs) {
8046     SmallVector<SDValue, 4> Ops;
8047 
8048     for (unsigned j = 0; j != 2; ++j) {
8049       SDValue SOp = SetCC->getOperand(j);
8050       if (SOp == OrigLoad)
8051         Ops.push_back(ExtLoad);
8052       else
8053         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8054     }
8055 
8056     Ops.push_back(SetCC->getOperand(2));
8057     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8058   }
8059 }
8060 
8061 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8062 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8063   SDValue N0 = N->getOperand(0);
8064   EVT DstVT = N->getValueType(0);
8065   EVT SrcVT = N0.getValueType();
8066 
8067   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8068           N->getOpcode() == ISD::ZERO_EXTEND) &&
8069          "Unexpected node type (not an extend)!");
8070 
8071   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8072   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8073   //   (v8i32 (sext (v8i16 (load x))))
8074   // into:
8075   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8076   //                          (v4i32 (sextload (x + 16)))))
8077   // Where uses of the original load, i.e.:
8078   //   (v8i16 (load x))
8079   // are replaced with:
8080   //   (v8i16 (truncate
8081   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8082   //                            (v4i32 (sextload (x + 16)))))))
8083   //
8084   // This combine is only applicable to illegal, but splittable, vectors.
8085   // All legal types, and illegal non-vector types, are handled elsewhere.
8086   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8087   //
8088   if (N0->getOpcode() != ISD::LOAD)
8089     return SDValue();
8090 
8091   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8092 
8093   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8094       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8095       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8096     return SDValue();
8097 
8098   SmallVector<SDNode *, 4> SetCCs;
8099   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8100     return SDValue();
8101 
8102   ISD::LoadExtType ExtType =
8103       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8104 
8105   // Try to split the vector types to get down to legal types.
8106   EVT SplitSrcVT = SrcVT;
8107   EVT SplitDstVT = DstVT;
8108   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8109          SplitSrcVT.getVectorNumElements() > 1) {
8110     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8111     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8112   }
8113 
8114   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8115     return SDValue();
8116 
8117   SDLoc DL(N);
8118   const unsigned NumSplits =
8119       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8120   const unsigned Stride = SplitSrcVT.getStoreSize();
8121   SmallVector<SDValue, 4> Loads;
8122   SmallVector<SDValue, 4> Chains;
8123 
8124   SDValue BasePtr = LN0->getBasePtr();
8125   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8126     const unsigned Offset = Idx * Stride;
8127     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8128 
8129     SDValue SplitLoad = DAG.getExtLoad(
8130         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8131         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8132         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8133 
8134     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8135                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8136 
8137     Loads.push_back(SplitLoad.getValue(0));
8138     Chains.push_back(SplitLoad.getValue(1));
8139   }
8140 
8141   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8142   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8143 
8144   // Simplify TF.
8145   AddToWorklist(NewChain.getNode());
8146 
8147   CombineTo(N, NewValue);
8148 
8149   // Replace uses of the original load (before extension)
8150   // with a truncate of the concatenated sextloaded vectors.
8151   SDValue Trunc =
8152       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8153   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8154   CombineTo(N0.getNode(), Trunc, NewChain);
8155   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8156 }
8157 
8158 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8159 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8160 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8161   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8162   EVT VT = N->getValueType(0);
8163 
8164   // and/or/xor
8165   SDValue N0 = N->getOperand(0);
8166   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8167         N0.getOpcode() == ISD::XOR) ||
8168       N0.getOperand(1).getOpcode() != ISD::Constant ||
8169       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8170     return SDValue();
8171 
8172   // shl/shr
8173   SDValue N1 = N0->getOperand(0);
8174   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8175       N1.getOperand(1).getOpcode() != ISD::Constant ||
8176       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8177     return SDValue();
8178 
8179   // load
8180   if (!isa<LoadSDNode>(N1.getOperand(0)))
8181     return SDValue();
8182   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8183   EVT MemVT = Load->getMemoryVT();
8184   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8185       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8186     return SDValue();
8187 
8188 
8189   // If the shift op is SHL, the logic op must be AND, otherwise the result
8190   // will be wrong.
8191   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8192     return SDValue();
8193 
8194   if (!N0.hasOneUse() || !N1.hasOneUse())
8195     return SDValue();
8196 
8197   SmallVector<SDNode*, 4> SetCCs;
8198   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8199                                ISD::ZERO_EXTEND, SetCCs, TLI))
8200     return SDValue();
8201 
8202   // Actually do the transformation.
8203   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8204                                    Load->getChain(), Load->getBasePtr(),
8205                                    Load->getMemoryVT(), Load->getMemOperand());
8206 
8207   SDLoc DL1(N1);
8208   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8209                               N1.getOperand(1));
8210 
8211   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8212   Mask = Mask.zext(VT.getSizeInBits());
8213   SDLoc DL0(N0);
8214   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8215                             DAG.getConstant(Mask, DL0, VT));
8216 
8217   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8218   CombineTo(N, And);
8219   if (SDValue(Load, 0).hasOneUse()) {
8220     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8221   } else {
8222     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8223                                 Load->getValueType(0), ExtLoad);
8224     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8225   }
8226   return SDValue(N,0); // Return N so it doesn't get rechecked!
8227 }
8228 
8229 /// If we're narrowing or widening the result of a vector select and the final
8230 /// size is the same size as a setcc (compare) feeding the select, then try to
8231 /// apply the cast operation to the select's operands because matching vector
8232 /// sizes for a select condition and other operands should be more efficient.
8233 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8234   unsigned CastOpcode = Cast->getOpcode();
8235   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8236           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8237           CastOpcode == ISD::FP_ROUND) &&
8238          "Unexpected opcode for vector select narrowing/widening");
8239 
8240   // We only do this transform before legal ops because the pattern may be
8241   // obfuscated by target-specific operations after legalization. Do not create
8242   // an illegal select op, however, because that may be difficult to lower.
8243   EVT VT = Cast->getValueType(0);
8244   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8245     return SDValue();
8246 
8247   SDValue VSel = Cast->getOperand(0);
8248   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8249       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8250     return SDValue();
8251 
8252   // Does the setcc have the same vector size as the casted select?
8253   SDValue SetCC = VSel.getOperand(0);
8254   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8255   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8256     return SDValue();
8257 
8258   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8259   SDValue A = VSel.getOperand(1);
8260   SDValue B = VSel.getOperand(2);
8261   SDValue CastA, CastB;
8262   SDLoc DL(Cast);
8263   if (CastOpcode == ISD::FP_ROUND) {
8264     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8265     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8266     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8267   } else {
8268     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8269     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8270   }
8271   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8272 }
8273 
8274 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8275 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8276 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8277                                      const TargetLowering &TLI, EVT VT,
8278                                      bool LegalOperations, SDNode *N,
8279                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8280   SDNode *N0Node = N0.getNode();
8281   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8282                                                    : ISD::isZEXTLoad(N0Node);
8283   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8284       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8285     return {};
8286 
8287   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8288   EVT MemVT = LN0->getMemoryVT();
8289   if ((LegalOperations || LN0->isVolatile()) &&
8290       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8291     return {};
8292 
8293   SDValue ExtLoad =
8294       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8295                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8296   Combiner.CombineTo(N, ExtLoad);
8297   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8298   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8299 }
8300 
8301 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8302 // Only generate vector extloads when 1) they're legal, and 2) they are
8303 // deemed desirable by the target.
8304 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8305                                   const TargetLowering &TLI, EVT VT,
8306                                   bool LegalOperations, SDNode *N, SDValue N0,
8307                                   ISD::LoadExtType ExtLoadType,
8308                                   ISD::NodeType ExtOpc) {
8309   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8310       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8311       ((LegalOperations || VT.isVector() ||
8312         cast<LoadSDNode>(N0)->isVolatile()) &&
8313        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8314     return {};
8315 
8316   bool DoXform = true;
8317   SmallVector<SDNode *, 4> SetCCs;
8318   if (!N0.hasOneUse())
8319     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8320   if (VT.isVector())
8321     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8322   if (!DoXform)
8323     return {};
8324 
8325   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8326   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8327                                    LN0->getBasePtr(), N0.getValueType(),
8328                                    LN0->getMemOperand());
8329   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8330   // If the load value is used only by N, replace it via CombineTo N.
8331   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8332   Combiner.CombineTo(N, ExtLoad);
8333   if (NoReplaceTrunc) {
8334     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8335   } else {
8336     SDValue Trunc =
8337         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8338     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8339   }
8340   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8341 }
8342 
8343 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8344                                        bool LegalOperations) {
8345   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8346           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8347 
8348   SDValue SetCC = N->getOperand(0);
8349   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8350       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8351     return SDValue();
8352 
8353   SDValue X = SetCC.getOperand(0);
8354   SDValue Ones = SetCC.getOperand(1);
8355   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8356   EVT VT = N->getValueType(0);
8357   EVT XVT = X.getValueType();
8358   // setge X, C is canonicalized to setgt, so we do not need to match that
8359   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8360   // not require the 'not' op.
8361   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8362     // Invert and smear/shift the sign bit:
8363     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8364     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8365     SDLoc DL(N);
8366     SDValue NotX = DAG.getNOT(DL, X, VT);
8367     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8368     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8369     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8370   }
8371   return SDValue();
8372 }
8373 
8374 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8375   SDValue N0 = N->getOperand(0);
8376   EVT VT = N->getValueType(0);
8377   SDLoc DL(N);
8378 
8379   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8380                                               LegalOperations))
8381     return SDValue(Res, 0);
8382 
8383   // fold (sext (sext x)) -> (sext x)
8384   // fold (sext (aext x)) -> (sext x)
8385   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8386     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8387 
8388   if (N0.getOpcode() == ISD::TRUNCATE) {
8389     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8390     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8391     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8392       SDNode *oye = N0.getOperand(0).getNode();
8393       if (NarrowLoad.getNode() != N0.getNode()) {
8394         CombineTo(N0.getNode(), NarrowLoad);
8395         // CombineTo deleted the truncate, if needed, but not what's under it.
8396         AddToWorklist(oye);
8397       }
8398       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8399     }
8400 
8401     // See if the value being truncated is already sign extended.  If so, just
8402     // eliminate the trunc/sext pair.
8403     SDValue Op = N0.getOperand(0);
8404     unsigned OpBits   = Op.getScalarValueSizeInBits();
8405     unsigned MidBits  = N0.getScalarValueSizeInBits();
8406     unsigned DestBits = VT.getScalarSizeInBits();
8407     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8408 
8409     if (OpBits == DestBits) {
8410       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8411       // bits, it is already ready.
8412       if (NumSignBits > DestBits-MidBits)
8413         return Op;
8414     } else if (OpBits < DestBits) {
8415       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8416       // bits, just sext from i32.
8417       if (NumSignBits > OpBits-MidBits)
8418         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8419     } else {
8420       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8421       // bits, just truncate to i32.
8422       if (NumSignBits > OpBits-MidBits)
8423         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8424     }
8425 
8426     // fold (sext (truncate x)) -> (sextinreg x).
8427     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8428                                                  N0.getValueType())) {
8429       if (OpBits < DestBits)
8430         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8431       else if (OpBits > DestBits)
8432         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8433       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8434                          DAG.getValueType(N0.getValueType()));
8435     }
8436   }
8437 
8438   // Try to simplify (sext (load x)).
8439   if (SDValue foldedExt =
8440           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8441                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8442     return foldedExt;
8443 
8444   // fold (sext (load x)) to multiple smaller sextloads.
8445   // Only on illegal but splittable vectors.
8446   if (SDValue ExtLoad = CombineExtLoad(N))
8447     return ExtLoad;
8448 
8449   // Try to simplify (sext (sextload x)).
8450   if (SDValue foldedExt = tryToFoldExtOfExtload(
8451           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8452     return foldedExt;
8453 
8454   // fold (sext (and/or/xor (load x), cst)) ->
8455   //      (and/or/xor (sextload x), (sext cst))
8456   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8457        N0.getOpcode() == ISD::XOR) &&
8458       isa<LoadSDNode>(N0.getOperand(0)) &&
8459       N0.getOperand(1).getOpcode() == ISD::Constant &&
8460       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8461     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8462     EVT MemVT = LN00->getMemoryVT();
8463     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8464       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8465       SmallVector<SDNode*, 4> SetCCs;
8466       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8467                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8468       if (DoXform) {
8469         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8470                                          LN00->getChain(), LN00->getBasePtr(),
8471                                          LN00->getMemoryVT(),
8472                                          LN00->getMemOperand());
8473         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8474         Mask = Mask.sext(VT.getSizeInBits());
8475         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8476                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8477         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8478         bool NoReplaceTruncAnd = !N0.hasOneUse();
8479         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8480         CombineTo(N, And);
8481         // If N0 has multiple uses, change other uses as well.
8482         if (NoReplaceTruncAnd) {
8483           SDValue TruncAnd =
8484               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8485           CombineTo(N0.getNode(), TruncAnd);
8486         }
8487         if (NoReplaceTrunc) {
8488           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8489         } else {
8490           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8491                                       LN00->getValueType(0), ExtLoad);
8492           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8493         }
8494         return SDValue(N,0); // Return N so it doesn't get rechecked!
8495       }
8496     }
8497   }
8498 
8499   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8500     return V;
8501 
8502   if (N0.getOpcode() == ISD::SETCC) {
8503     SDValue N00 = N0.getOperand(0);
8504     SDValue N01 = N0.getOperand(1);
8505     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8506     EVT N00VT = N0.getOperand(0).getValueType();
8507 
8508     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8509     // Only do this before legalize for now.
8510     if (VT.isVector() && !LegalOperations &&
8511         TLI.getBooleanContents(N00VT) ==
8512             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8513       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8514       // of the same size as the compared operands. Only optimize sext(setcc())
8515       // if this is the case.
8516       EVT SVT = getSetCCResultType(N00VT);
8517 
8518       // We know that the # elements of the results is the same as the
8519       // # elements of the compare (and the # elements of the compare result
8520       // for that matter).  Check to see that they are the same size.  If so,
8521       // we know that the element size of the sext'd result matches the
8522       // element size of the compare operands.
8523       if (VT.getSizeInBits() == SVT.getSizeInBits())
8524         return DAG.getSetCC(DL, VT, N00, N01, CC);
8525 
8526       // If the desired elements are smaller or larger than the source
8527       // elements, we can use a matching integer vector type and then
8528       // truncate/sign extend.
8529       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8530       if (SVT == MatchingVecType) {
8531         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8532         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8533       }
8534     }
8535 
8536     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8537     // Here, T can be 1 or -1, depending on the type of the setcc and
8538     // getBooleanContents().
8539     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8540 
8541     // To determine the "true" side of the select, we need to know the high bit
8542     // of the value returned by the setcc if it evaluates to true.
8543     // If the type of the setcc is i1, then the true case of the select is just
8544     // sext(i1 1), that is, -1.
8545     // If the type of the setcc is larger (say, i8) then the value of the high
8546     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8547     // of the appropriate width.
8548     SDValue ExtTrueVal = (SetCCWidth == 1)
8549                              ? DAG.getAllOnesConstant(DL, VT)
8550                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8551     SDValue Zero = DAG.getConstant(0, DL, VT);
8552     if (SDValue SCC =
8553             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8554       return SCC;
8555 
8556     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8557       EVT SetCCVT = getSetCCResultType(N00VT);
8558       // Don't do this transform for i1 because there's a select transform
8559       // that would reverse it.
8560       // TODO: We should not do this transform at all without a target hook
8561       // because a sext is likely cheaper than a select?
8562       if (SetCCVT.getScalarSizeInBits() != 1 &&
8563           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8564         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8565         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8566       }
8567     }
8568   }
8569 
8570   // fold (sext x) -> (zext x) if the sign bit is known zero.
8571   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8572       DAG.SignBitIsZero(N0))
8573     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8574 
8575   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8576     return NewVSel;
8577 
8578   return SDValue();
8579 }
8580 
8581 // isTruncateOf - If N is a truncate of some other value, return true, record
8582 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8583 // This function computes KnownBits to avoid a duplicated call to
8584 // computeKnownBits in the caller.
8585 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8586                          KnownBits &Known) {
8587   if (N->getOpcode() == ISD::TRUNCATE) {
8588     Op = N->getOperand(0);
8589     DAG.computeKnownBits(Op, Known);
8590     return true;
8591   }
8592 
8593   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
8594       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
8595     return false;
8596 
8597   SDValue Op0 = N->getOperand(0);
8598   SDValue Op1 = N->getOperand(1);
8599   assert(Op0.getValueType() == Op1.getValueType());
8600 
8601   if (isNullConstant(Op0))
8602     Op = Op1;
8603   else if (isNullConstant(Op1))
8604     Op = Op0;
8605   else
8606     return false;
8607 
8608   DAG.computeKnownBits(Op, Known);
8609 
8610   if (!(Known.Zero | 1).isAllOnesValue())
8611     return false;
8612 
8613   return true;
8614 }
8615 
8616 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8617   SDValue N0 = N->getOperand(0);
8618   EVT VT = N->getValueType(0);
8619 
8620   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8621                                               LegalOperations))
8622     return SDValue(Res, 0);
8623 
8624   // fold (zext (zext x)) -> (zext x)
8625   // fold (zext (aext x)) -> (zext x)
8626   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8627     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8628                        N0.getOperand(0));
8629 
8630   // fold (zext (truncate x)) -> (zext x) or
8631   //      (zext (truncate x)) -> (truncate x)
8632   // This is valid when the truncated bits of x are already zero.
8633   // FIXME: We should extend this to work for vectors too.
8634   SDValue Op;
8635   KnownBits Known;
8636   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
8637     APInt TruncatedBits =
8638       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
8639       APInt(Op.getValueSizeInBits(), 0) :
8640       APInt::getBitsSet(Op.getValueSizeInBits(),
8641                         N0.getValueSizeInBits(),
8642                         std::min(Op.getValueSizeInBits(),
8643                                  VT.getSizeInBits()));
8644     if (TruncatedBits.isSubsetOf(Known.Zero))
8645       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8646   }
8647 
8648   // fold (zext (truncate x)) -> (and x, mask)
8649   if (N0.getOpcode() == ISD::TRUNCATE) {
8650     // fold (zext (truncate (load x))) -> (zext (smaller load x))
8651     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8652     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8653       SDNode *oye = N0.getOperand(0).getNode();
8654       if (NarrowLoad.getNode() != N0.getNode()) {
8655         CombineTo(N0.getNode(), NarrowLoad);
8656         // CombineTo deleted the truncate, if needed, but not what's under it.
8657         AddToWorklist(oye);
8658       }
8659       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8660     }
8661 
8662     EVT SrcVT = N0.getOperand(0).getValueType();
8663     EVT MinVT = N0.getValueType();
8664 
8665     // Try to mask before the extension to avoid having to generate a larger mask,
8666     // possibly over several sub-vectors.
8667     if (SrcVT.bitsLT(VT) && VT.isVector()) {
8668       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8669                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8670         SDValue Op = N0.getOperand(0);
8671         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8672         AddToWorklist(Op.getNode());
8673         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8674         // Transfer the debug info; the new node is equivalent to N0.
8675         DAG.transferDbgValues(N0, ZExtOrTrunc);
8676         return ZExtOrTrunc;
8677       }
8678     }
8679 
8680     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8681       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8682       AddToWorklist(Op.getNode());
8683       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8684       // We may safely transfer the debug info describing the truncate node over
8685       // to the equivalent and operation.
8686       DAG.transferDbgValues(N0, And);
8687       return And;
8688     }
8689   }
8690 
8691   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8692   // if either of the casts is not free.
8693   if (N0.getOpcode() == ISD::AND &&
8694       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8695       N0.getOperand(1).getOpcode() == ISD::Constant &&
8696       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8697                            N0.getValueType()) ||
8698        !TLI.isZExtFree(N0.getValueType(), VT))) {
8699     SDValue X = N0.getOperand(0).getOperand(0);
8700     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8701     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8702     Mask = Mask.zext(VT.getSizeInBits());
8703     SDLoc DL(N);
8704     return DAG.getNode(ISD::AND, DL, VT,
8705                        X, DAG.getConstant(Mask, DL, VT));
8706   }
8707 
8708   // Try to simplify (zext (load x)).
8709   if (SDValue foldedExt =
8710           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8711                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8712     return foldedExt;
8713 
8714   // fold (zext (load x)) to multiple smaller zextloads.
8715   // Only on illegal but splittable vectors.
8716   if (SDValue ExtLoad = CombineExtLoad(N))
8717     return ExtLoad;
8718 
8719   // fold (zext (and/or/xor (load x), cst)) ->
8720   //      (and/or/xor (zextload x), (zext cst))
8721   // Unless (and (load x) cst) will match as a zextload already and has
8722   // additional users.
8723   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8724        N0.getOpcode() == ISD::XOR) &&
8725       isa<LoadSDNode>(N0.getOperand(0)) &&
8726       N0.getOperand(1).getOpcode() == ISD::Constant &&
8727       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8728     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8729     EVT MemVT = LN00->getMemoryVT();
8730     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8731         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8732       bool DoXform = true;
8733       SmallVector<SDNode*, 4> SetCCs;
8734       if (!N0.hasOneUse()) {
8735         if (N0.getOpcode() == ISD::AND) {
8736           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8737           EVT LoadResultTy = AndC->getValueType(0);
8738           EVT ExtVT;
8739           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8740             DoXform = false;
8741         }
8742       }
8743       if (DoXform)
8744         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8745                                           ISD::ZERO_EXTEND, SetCCs, TLI);
8746       if (DoXform) {
8747         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8748                                          LN00->getChain(), LN00->getBasePtr(),
8749                                          LN00->getMemoryVT(),
8750                                          LN00->getMemOperand());
8751         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8752         Mask = Mask.zext(VT.getSizeInBits());
8753         SDLoc DL(N);
8754         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8755                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8756         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8757         bool NoReplaceTruncAnd = !N0.hasOneUse();
8758         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8759         CombineTo(N, And);
8760         // If N0 has multiple uses, change other uses as well.
8761         if (NoReplaceTruncAnd) {
8762           SDValue TruncAnd =
8763               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8764           CombineTo(N0.getNode(), TruncAnd);
8765         }
8766         if (NoReplaceTrunc) {
8767           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8768         } else {
8769           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8770                                       LN00->getValueType(0), ExtLoad);
8771           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8772         }
8773         return SDValue(N,0); // Return N so it doesn't get rechecked!
8774       }
8775     }
8776   }
8777 
8778   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8779   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8780   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8781     return ZExtLoad;
8782 
8783   // Try to simplify (zext (zextload x)).
8784   if (SDValue foldedExt = tryToFoldExtOfExtload(
8785           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8786     return foldedExt;
8787 
8788   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8789     return V;
8790 
8791   if (N0.getOpcode() == ISD::SETCC) {
8792     // Only do this before legalize for now.
8793     if (!LegalOperations && VT.isVector() &&
8794         N0.getValueType().getVectorElementType() == MVT::i1) {
8795       EVT N00VT = N0.getOperand(0).getValueType();
8796       if (getSetCCResultType(N00VT) == N0.getValueType())
8797         return SDValue();
8798 
8799       // We know that the # elements of the results is the same as the #
8800       // elements of the compare (and the # elements of the compare result for
8801       // that matter). Check to see that they are the same size. If so, we know
8802       // that the element size of the sext'd result matches the element size of
8803       // the compare operands.
8804       SDLoc DL(N);
8805       SDValue VecOnes = DAG.getConstant(1, DL, VT);
8806       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8807         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8808         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8809                                      N0.getOperand(1), N0.getOperand(2));
8810         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8811       }
8812 
8813       // If the desired elements are smaller or larger than the source
8814       // elements we can use a matching integer vector type and then
8815       // truncate/sign extend.
8816       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8817       SDValue VsetCC =
8818           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8819                       N0.getOperand(1), N0.getOperand(2));
8820       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8821                          VecOnes);
8822     }
8823 
8824     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8825     SDLoc DL(N);
8826     if (SDValue SCC = SimplifySelectCC(
8827             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8828             DAG.getConstant(0, DL, VT),
8829             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8830       return SCC;
8831   }
8832 
8833   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8834   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8835       isa<ConstantSDNode>(N0.getOperand(1)) &&
8836       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8837       N0.hasOneUse()) {
8838     SDValue ShAmt = N0.getOperand(1);
8839     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8840     if (N0.getOpcode() == ISD::SHL) {
8841       SDValue InnerZExt = N0.getOperand(0);
8842       // If the original shl may be shifting out bits, do not perform this
8843       // transformation.
8844       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8845         InnerZExt.getOperand(0).getValueSizeInBits();
8846       if (ShAmtVal > KnownZeroBits)
8847         return SDValue();
8848     }
8849 
8850     SDLoc DL(N);
8851 
8852     // Ensure that the shift amount is wide enough for the shifted value.
8853     if (VT.getSizeInBits() >= 256)
8854       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8855 
8856     return DAG.getNode(N0.getOpcode(), DL, VT,
8857                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8858                        ShAmt);
8859   }
8860 
8861   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8862     return NewVSel;
8863 
8864   return SDValue();
8865 }
8866 
8867 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8868   SDValue N0 = N->getOperand(0);
8869   EVT VT = N->getValueType(0);
8870 
8871   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8872                                               LegalOperations))
8873     return SDValue(Res, 0);
8874 
8875   // fold (aext (aext x)) -> (aext x)
8876   // fold (aext (zext x)) -> (zext x)
8877   // fold (aext (sext x)) -> (sext x)
8878   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8879       N0.getOpcode() == ISD::ZERO_EXTEND ||
8880       N0.getOpcode() == ISD::SIGN_EXTEND)
8881     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8882 
8883   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8884   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8885   if (N0.getOpcode() == ISD::TRUNCATE) {
8886     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8887       SDNode *oye = N0.getOperand(0).getNode();
8888       if (NarrowLoad.getNode() != N0.getNode()) {
8889         CombineTo(N0.getNode(), NarrowLoad);
8890         // CombineTo deleted the truncate, if needed, but not what's under it.
8891         AddToWorklist(oye);
8892       }
8893       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8894     }
8895   }
8896 
8897   // fold (aext (truncate x))
8898   if (N0.getOpcode() == ISD::TRUNCATE)
8899     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8900 
8901   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8902   // if the trunc is not free.
8903   if (N0.getOpcode() == ISD::AND &&
8904       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8905       N0.getOperand(1).getOpcode() == ISD::Constant &&
8906       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8907                           N0.getValueType())) {
8908     SDLoc DL(N);
8909     SDValue X = N0.getOperand(0).getOperand(0);
8910     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8911     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8912     Mask = Mask.zext(VT.getSizeInBits());
8913     return DAG.getNode(ISD::AND, DL, VT,
8914                        X, DAG.getConstant(Mask, DL, VT));
8915   }
8916 
8917   // fold (aext (load x)) -> (aext (truncate (extload x)))
8918   // None of the supported targets knows how to perform load and any_ext
8919   // on vectors in one instruction.  We only perform this transformation on
8920   // scalars.
8921   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8922       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8923       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8924     bool DoXform = true;
8925     SmallVector<SDNode*, 4> SetCCs;
8926     if (!N0.hasOneUse())
8927       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8928                                         TLI);
8929     if (DoXform) {
8930       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8931       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8932                                        LN0->getChain(),
8933                                        LN0->getBasePtr(), N0.getValueType(),
8934                                        LN0->getMemOperand());
8935       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
8936       // If the load value is used only by N, replace it via CombineTo N.
8937       bool NoReplaceTrunc = N0.hasOneUse();
8938       CombineTo(N, ExtLoad);
8939       if (NoReplaceTrunc) {
8940         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8941       } else {
8942         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8943                                     N0.getValueType(), ExtLoad);
8944         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8945       }
8946       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8947     }
8948   }
8949 
8950   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8951   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8952   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8953   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8954       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8955     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8956     ISD::LoadExtType ExtType = LN0->getExtensionType();
8957     EVT MemVT = LN0->getMemoryVT();
8958     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8959       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8960                                        VT, LN0->getChain(), LN0->getBasePtr(),
8961                                        MemVT, LN0->getMemOperand());
8962       CombineTo(N, ExtLoad);
8963       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8964       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8965     }
8966   }
8967 
8968   if (N0.getOpcode() == ISD::SETCC) {
8969     // For vectors:
8970     // aext(setcc) -> vsetcc
8971     // aext(setcc) -> truncate(vsetcc)
8972     // aext(setcc) -> aext(vsetcc)
8973     // Only do this before legalize for now.
8974     if (VT.isVector() && !LegalOperations) {
8975       EVT N00VT = N0.getOperand(0).getValueType();
8976       if (getSetCCResultType(N00VT) == N0.getValueType())
8977         return SDValue();
8978 
8979       // We know that the # elements of the results is the same as the
8980       // # elements of the compare (and the # elements of the compare result
8981       // for that matter).  Check to see that they are the same size.  If so,
8982       // we know that the element size of the sext'd result matches the
8983       // element size of the compare operands.
8984       if (VT.getSizeInBits() == N00VT.getSizeInBits())
8985         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8986                              N0.getOperand(1),
8987                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
8988       // If the desired elements are smaller or larger than the source
8989       // elements we can use a matching integer vector type and then
8990       // truncate/any extend
8991       else {
8992         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8993         SDValue VsetCC =
8994           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8995                         N0.getOperand(1),
8996                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
8997         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8998       }
8999     }
9000 
9001     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9002     SDLoc DL(N);
9003     if (SDValue SCC = SimplifySelectCC(
9004             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9005             DAG.getConstant(0, DL, VT),
9006             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9007       return SCC;
9008   }
9009 
9010   return SDValue();
9011 }
9012 
9013 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9014   unsigned Opcode = N->getOpcode();
9015   SDValue N0 = N->getOperand(0);
9016   SDValue N1 = N->getOperand(1);
9017   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9018 
9019   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9020   if (N0.getOpcode() == Opcode &&
9021       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9022     return N0;
9023 
9024   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9025       N0.getOperand(0).getOpcode() == Opcode) {
9026     // We have an assert, truncate, assert sandwich. Make one stronger assert
9027     // by asserting on the smallest asserted type to the larger source type.
9028     // This eliminates the later assert:
9029     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9030     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9031     SDValue BigA = N0.getOperand(0);
9032     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9033     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9034            "Asserting zero/sign-extended bits to a type larger than the "
9035            "truncated destination does not provide information");
9036 
9037     SDLoc DL(N);
9038     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9039     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9040     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9041                                     BigA.getOperand(0), MinAssertVTVal);
9042     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9043   }
9044 
9045   return SDValue();
9046 }
9047 
9048 /// If the result of a wider load is shifted to right of N  bits and then
9049 /// truncated to a narrower type and where N is a multiple of number of bits of
9050 /// the narrower type, transform it to a narrower load from address + N / num of
9051 /// bits of new type. Also narrow the load if the result is masked with an AND
9052 /// to effectively produce a smaller type. If the result is to be extended, also
9053 /// fold the extension to form a extending load.
9054 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9055   unsigned Opc = N->getOpcode();
9056 
9057   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9058   SDValue N0 = N->getOperand(0);
9059   EVT VT = N->getValueType(0);
9060   EVT ExtVT = VT;
9061 
9062   // This transformation isn't valid for vector loads.
9063   if (VT.isVector())
9064     return SDValue();
9065 
9066   unsigned ShAmt = 0;
9067   bool HasShiftedOffset = false;
9068   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9069   // extended to VT.
9070   if (Opc == ISD::SIGN_EXTEND_INREG) {
9071     ExtType = ISD::SEXTLOAD;
9072     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9073   } else if (Opc == ISD::SRL) {
9074     // Another special-case: SRL is basically zero-extending a narrower value,
9075     // or it maybe shifting a higher subword, half or byte into the lowest
9076     // bits.
9077     ExtType = ISD::ZEXTLOAD;
9078     N0 = SDValue(N, 0);
9079 
9080     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9081     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9082     if (!N01 || !LN0)
9083       return SDValue();
9084 
9085     uint64_t ShiftAmt = N01->getZExtValue();
9086     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9087     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9088       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9089     else
9090       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9091                                 VT.getSizeInBits() - ShiftAmt);
9092   } else if (Opc == ISD::AND) {
9093     // An AND with a constant mask is the same as a truncate + zero-extend.
9094     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9095     if (!AndC)
9096       return SDValue();
9097 
9098     const APInt &Mask = AndC->getAPIntValue();
9099     unsigned ActiveBits = 0;
9100     if (Mask.isMask()) {
9101       ActiveBits = Mask.countTrailingOnes();
9102     } else if (Mask.isShiftedMask()) {
9103       ShAmt = Mask.countTrailingZeros();
9104       APInt ShiftedMask = Mask.lshr(ShAmt);
9105       ActiveBits = ShiftedMask.countTrailingOnes();
9106       HasShiftedOffset = true;
9107     } else
9108       return SDValue();
9109 
9110     ExtType = ISD::ZEXTLOAD;
9111     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9112   }
9113 
9114   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9115     SDValue SRL = N0;
9116     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9117       ShAmt = ConstShift->getZExtValue();
9118       unsigned EVTBits = ExtVT.getSizeInBits();
9119       // Is the shift amount a multiple of size of VT?
9120       if ((ShAmt & (EVTBits-1)) == 0) {
9121         N0 = N0.getOperand(0);
9122         // Is the load width a multiple of size of VT?
9123         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9124           return SDValue();
9125       }
9126 
9127       // At this point, we must have a load or else we can't do the transform.
9128       if (!isa<LoadSDNode>(N0)) return SDValue();
9129 
9130       auto *LN0 = cast<LoadSDNode>(N0);
9131 
9132       // Because a SRL must be assumed to *need* to zero-extend the high bits
9133       // (as opposed to anyext the high bits), we can't combine the zextload
9134       // lowering of SRL and an sextload.
9135       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9136         return SDValue();
9137 
9138       // If the shift amount is larger than the input type then we're not
9139       // accessing any of the loaded bytes.  If the load was a zextload/extload
9140       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9141       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9142         return SDValue();
9143 
9144       // If the SRL is only used by a masking AND, we may be able to adjust
9145       // the ExtVT to make the AND redundant.
9146       SDNode *Mask = *(SRL->use_begin());
9147       if (Mask->getOpcode() == ISD::AND &&
9148           isa<ConstantSDNode>(Mask->getOperand(1))) {
9149         const APInt &ShiftMask =
9150           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9151         if (ShiftMask.isMask()) {
9152           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9153                                            ShiftMask.countTrailingOnes());
9154           // If the mask is smaller, recompute the type.
9155           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9156               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9157             ExtVT = MaskedVT;
9158         }
9159       }
9160     }
9161   }
9162 
9163   // If the load is shifted left (and the result isn't shifted back right),
9164   // we can fold the truncate through the shift.
9165   unsigned ShLeftAmt = 0;
9166   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9167       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9168     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9169       ShLeftAmt = N01->getZExtValue();
9170       N0 = N0.getOperand(0);
9171     }
9172   }
9173 
9174   // If we haven't found a load, we can't narrow it.
9175   if (!isa<LoadSDNode>(N0))
9176     return SDValue();
9177 
9178   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9179   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9180     return SDValue();
9181 
9182   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9183     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9184     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9185     return LVTStoreBits - EVTStoreBits - ShAmt;
9186   };
9187 
9188   // For big endian targets, we need to adjust the offset to the pointer to
9189   // load the correct bytes.
9190   if (DAG.getDataLayout().isBigEndian())
9191     ShAmt = AdjustBigEndianShift(ShAmt);
9192 
9193   EVT PtrType = N0.getOperand(1).getValueType();
9194   uint64_t PtrOff = ShAmt / 8;
9195   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9196   SDLoc DL(LN0);
9197   // The original load itself didn't wrap, so an offset within it doesn't.
9198   SDNodeFlags Flags;
9199   Flags.setNoUnsignedWrap(true);
9200   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9201                                PtrType, LN0->getBasePtr(),
9202                                DAG.getConstant(PtrOff, DL, PtrType),
9203                                Flags);
9204   AddToWorklist(NewPtr.getNode());
9205 
9206   SDValue Load;
9207   if (ExtType == ISD::NON_EXTLOAD)
9208     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9209                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9210                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9211   else
9212     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9213                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9214                           NewAlign, LN0->getMemOperand()->getFlags(),
9215                           LN0->getAAInfo());
9216 
9217   // Replace the old load's chain with the new load's chain.
9218   WorklistRemover DeadNodes(*this);
9219   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9220 
9221   // Shift the result left, if we've swallowed a left shift.
9222   SDValue Result = Load;
9223   if (ShLeftAmt != 0) {
9224     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9225     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9226       ShImmTy = VT;
9227     // If the shift amount is as large as the result size (but, presumably,
9228     // no larger than the source) then the useful bits of the result are
9229     // zero; we can't simply return the shortened shift, because the result
9230     // of that operation is undefined.
9231     SDLoc DL(N0);
9232     if (ShLeftAmt >= VT.getSizeInBits())
9233       Result = DAG.getConstant(0, DL, VT);
9234     else
9235       Result = DAG.getNode(ISD::SHL, DL, VT,
9236                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9237   }
9238 
9239   if (HasShiftedOffset) {
9240     // Recalculate the shift amount after it has been altered to calculate
9241     // the offset.
9242     if (DAG.getDataLayout().isBigEndian())
9243       ShAmt = AdjustBigEndianShift(ShAmt);
9244 
9245     // We're using a shifted mask, so the load now has an offset. This means we
9246     // now need to shift right the mask to match the new load and then shift
9247     // right the result of the AND.
9248     const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
9249     APInt ShiftedMask = Mask.lshr(ShAmt);
9250     DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
9251     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9252     SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
9253                                   ShiftC);
9254     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
9255     DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
9256   }
9257   // Return the new loaded value.
9258   return Result;
9259 }
9260 
9261 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9262   SDValue N0 = N->getOperand(0);
9263   SDValue N1 = N->getOperand(1);
9264   EVT VT = N->getValueType(0);
9265   EVT EVT = cast<VTSDNode>(N1)->getVT();
9266   unsigned VTBits = VT.getScalarSizeInBits();
9267   unsigned EVTBits = EVT.getScalarSizeInBits();
9268 
9269   if (N0.isUndef())
9270     return DAG.getUNDEF(VT);
9271 
9272   // fold (sext_in_reg c1) -> c1
9273   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9274     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9275 
9276   // If the input is already sign extended, just drop the extension.
9277   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9278     return N0;
9279 
9280   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9281   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9282       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9283     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9284                        N0.getOperand(0), N1);
9285 
9286   // fold (sext_in_reg (sext x)) -> (sext x)
9287   // fold (sext_in_reg (aext x)) -> (sext x)
9288   // if x is small enough.
9289   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9290     SDValue N00 = N0.getOperand(0);
9291     if (N00.getScalarValueSizeInBits() <= EVTBits &&
9292         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9293       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9294   }
9295 
9296   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9297   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9298        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9299        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9300       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9301     if (!LegalOperations ||
9302         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9303       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
9304   }
9305 
9306   // fold (sext_in_reg (zext x)) -> (sext x)
9307   // iff we are extending the source sign bit.
9308   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9309     SDValue N00 = N0.getOperand(0);
9310     if (N00.getScalarValueSizeInBits() == EVTBits &&
9311         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9312       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9313   }
9314 
9315   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9316   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9317     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9318 
9319   // fold operands of sext_in_reg based on knowledge that the top bits are not
9320   // demanded.
9321   if (SimplifyDemandedBits(SDValue(N, 0)))
9322     return SDValue(N, 0);
9323 
9324   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9325   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9326   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9327     return NarrowLoad;
9328 
9329   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9330   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9331   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9332   if (N0.getOpcode() == ISD::SRL) {
9333     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9334       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9335         // We can turn this into an SRA iff the input to the SRL is already sign
9336         // extended enough.
9337         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9338         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9339           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9340                              N0.getOperand(0), N0.getOperand(1));
9341       }
9342   }
9343 
9344   // fold (sext_inreg (extload x)) -> (sextload x)
9345   // If sextload is not supported by target, we can only do the combine when
9346   // load has one use. Doing otherwise can block folding the extload with other
9347   // extends that the target does support.
9348   if (ISD::isEXTLoad(N0.getNode()) &&
9349       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9350       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9351       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9352         N0.hasOneUse()) ||
9353        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9354     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9355     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9356                                      LN0->getChain(),
9357                                      LN0->getBasePtr(), EVT,
9358                                      LN0->getMemOperand());
9359     CombineTo(N, ExtLoad);
9360     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9361     AddToWorklist(ExtLoad.getNode());
9362     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9363   }
9364   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9365   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9366       N0.hasOneUse() &&
9367       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9368       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9369        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9370     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9371     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9372                                      LN0->getChain(),
9373                                      LN0->getBasePtr(), EVT,
9374                                      LN0->getMemOperand());
9375     CombineTo(N, ExtLoad);
9376     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9377     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9378   }
9379 
9380   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9381   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9382     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9383                                            N0.getOperand(1), false))
9384       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9385                          BSwap, N1);
9386   }
9387 
9388   return SDValue();
9389 }
9390 
9391 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9392   SDValue N0 = N->getOperand(0);
9393   EVT VT = N->getValueType(0);
9394 
9395   if (N0.isUndef())
9396     return DAG.getUNDEF(VT);
9397 
9398   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9399                                               LegalOperations))
9400     return SDValue(Res, 0);
9401 
9402   return SDValue();
9403 }
9404 
9405 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9406   SDValue N0 = N->getOperand(0);
9407   EVT VT = N->getValueType(0);
9408 
9409   if (N0.isUndef())
9410     return DAG.getUNDEF(VT);
9411 
9412   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9413                                               LegalOperations))
9414     return SDValue(Res, 0);
9415 
9416   return SDValue();
9417 }
9418 
9419 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9420   SDValue N0 = N->getOperand(0);
9421   EVT VT = N->getValueType(0);
9422   bool isLE = DAG.getDataLayout().isLittleEndian();
9423 
9424   // noop truncate
9425   if (N0.getValueType() == N->getValueType(0))
9426     return N0;
9427 
9428   // fold (truncate (truncate x)) -> (truncate x)
9429   if (N0.getOpcode() == ISD::TRUNCATE)
9430     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9431 
9432   // fold (truncate c1) -> c1
9433   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9434     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9435     if (C.getNode() != N)
9436       return C;
9437   }
9438 
9439   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9440   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9441       N0.getOpcode() == ISD::SIGN_EXTEND ||
9442       N0.getOpcode() == ISD::ANY_EXTEND) {
9443     // if the source is smaller than the dest, we still need an extend.
9444     if (N0.getOperand(0).getValueType().bitsLT(VT))
9445       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9446     // if the source is larger than the dest, than we just need the truncate.
9447     if (N0.getOperand(0).getValueType().bitsGT(VT))
9448       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9449     // if the source and dest are the same type, we can drop both the extend
9450     // and the truncate.
9451     return N0.getOperand(0);
9452   }
9453 
9454   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9455   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9456     return SDValue();
9457 
9458   // Fold extract-and-trunc into a narrow extract. For example:
9459   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9460   //   i32 y = TRUNCATE(i64 x)
9461   //        -- becomes --
9462   //   v16i8 b = BITCAST (v2i64 val)
9463   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9464   //
9465   // Note: We only run this optimization after type legalization (which often
9466   // creates this pattern) and before operation legalization after which
9467   // we need to be more careful about the vector instructions that we generate.
9468   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9469       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9470     EVT VecTy = N0.getOperand(0).getValueType();
9471     EVT ExTy = N0.getValueType();
9472     EVT TrTy = N->getValueType(0);
9473 
9474     unsigned NumElem = VecTy.getVectorNumElements();
9475     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9476 
9477     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9478     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9479 
9480     SDValue EltNo = N0->getOperand(1);
9481     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9482       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9483       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9484       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9485 
9486       SDLoc DL(N);
9487       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9488                          DAG.getBitcast(NVT, N0.getOperand(0)),
9489                          DAG.getConstant(Index, DL, IndexTy));
9490     }
9491   }
9492 
9493   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9494   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9495     EVT SrcVT = N0.getValueType();
9496     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9497         TLI.isTruncateFree(SrcVT, VT)) {
9498       SDLoc SL(N0);
9499       SDValue Cond = N0.getOperand(0);
9500       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9501       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9502       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9503     }
9504   }
9505 
9506   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9507   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9508       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9509       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9510     SDValue Amt = N0.getOperand(1);
9511     KnownBits Known;
9512     DAG.computeKnownBits(Amt, Known);
9513     unsigned Size = VT.getScalarSizeInBits();
9514     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9515       SDLoc SL(N);
9516       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9517 
9518       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9519       if (AmtVT != Amt.getValueType()) {
9520         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9521         AddToWorklist(Amt.getNode());
9522       }
9523       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9524     }
9525   }
9526 
9527   // Fold a series of buildvector, bitcast, and truncate if possible.
9528   // For example fold
9529   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9530   //   (2xi32 (buildvector x, y)).
9531   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9532       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9533       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9534       N0.getOperand(0).hasOneUse()) {
9535     SDValue BuildVect = N0.getOperand(0);
9536     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9537     EVT TruncVecEltTy = VT.getVectorElementType();
9538 
9539     // Check that the element types match.
9540     if (BuildVectEltTy == TruncVecEltTy) {
9541       // Now we only need to compute the offset of the truncated elements.
9542       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9543       unsigned TruncVecNumElts = VT.getVectorNumElements();
9544       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9545 
9546       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9547              "Invalid number of elements");
9548 
9549       SmallVector<SDValue, 8> Opnds;
9550       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9551         Opnds.push_back(BuildVect.getOperand(i));
9552 
9553       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9554     }
9555   }
9556 
9557   // See if we can simplify the input to this truncate through knowledge that
9558   // only the low bits are being used.
9559   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9560   // Currently we only perform this optimization on scalars because vectors
9561   // may have different active low bits.
9562   if (!VT.isVector()) {
9563     APInt Mask =
9564         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9565     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9566       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9567   }
9568 
9569   // fold (truncate (load x)) -> (smaller load x)
9570   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9571   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9572     if (SDValue Reduced = ReduceLoadWidth(N))
9573       return Reduced;
9574 
9575     // Handle the case where the load remains an extending load even
9576     // after truncation.
9577     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9578       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9579       if (!LN0->isVolatile() &&
9580           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9581         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9582                                          VT, LN0->getChain(), LN0->getBasePtr(),
9583                                          LN0->getMemoryVT(),
9584                                          LN0->getMemOperand());
9585         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9586         return NewLoad;
9587       }
9588     }
9589   }
9590 
9591   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9592   // where ... are all 'undef'.
9593   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9594     SmallVector<EVT, 8> VTs;
9595     SDValue V;
9596     unsigned Idx = 0;
9597     unsigned NumDefs = 0;
9598 
9599     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9600       SDValue X = N0.getOperand(i);
9601       if (!X.isUndef()) {
9602         V = X;
9603         Idx = i;
9604         NumDefs++;
9605       }
9606       // Stop if more than one members are non-undef.
9607       if (NumDefs > 1)
9608         break;
9609       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9610                                      VT.getVectorElementType(),
9611                                      X.getValueType().getVectorNumElements()));
9612     }
9613 
9614     if (NumDefs == 0)
9615       return DAG.getUNDEF(VT);
9616 
9617     if (NumDefs == 1) {
9618       assert(V.getNode() && "The single defined operand is empty!");
9619       SmallVector<SDValue, 8> Opnds;
9620       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9621         if (i != Idx) {
9622           Opnds.push_back(DAG.getUNDEF(VTs[i]));
9623           continue;
9624         }
9625         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9626         AddToWorklist(NV.getNode());
9627         Opnds.push_back(NV);
9628       }
9629       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9630     }
9631   }
9632 
9633   // Fold truncate of a bitcast of a vector to an extract of the low vector
9634   // element.
9635   //
9636   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9637   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9638     SDValue VecSrc = N0.getOperand(0);
9639     EVT SrcVT = VecSrc.getValueType();
9640     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9641         (!LegalOperations ||
9642          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9643       SDLoc SL(N);
9644 
9645       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9646       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9647       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9648                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9649     }
9650   }
9651 
9652   // Simplify the operands using demanded-bits information.
9653   if (!VT.isVector() &&
9654       SimplifyDemandedBits(SDValue(N, 0)))
9655     return SDValue(N, 0);
9656 
9657   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9658   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9659   // When the adde's carry is not used.
9660   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9661       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9662       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9663     SDLoc SL(N);
9664     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9665     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9666     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9667     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9668   }
9669 
9670   // fold (truncate (extract_subvector(ext x))) ->
9671   //      (extract_subvector x)
9672   // TODO: This can be generalized to cover cases where the truncate and extract
9673   // do not fully cancel each other out.
9674   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9675     SDValue N00 = N0.getOperand(0);
9676     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9677         N00.getOpcode() == ISD::ZERO_EXTEND ||
9678         N00.getOpcode() == ISD::ANY_EXTEND) {
9679       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9680           VT.getVectorElementType())
9681         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9682                            N00.getOperand(0), N0.getOperand(1));
9683     }
9684   }
9685 
9686   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9687     return NewVSel;
9688 
9689   return SDValue();
9690 }
9691 
9692 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9693   SDValue Elt = N->getOperand(i);
9694   if (Elt.getOpcode() != ISD::MERGE_VALUES)
9695     return Elt.getNode();
9696   return Elt.getOperand(Elt.getResNo()).getNode();
9697 }
9698 
9699 /// build_pair (load, load) -> load
9700 /// if load locations are consecutive.
9701 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9702   assert(N->getOpcode() == ISD::BUILD_PAIR);
9703 
9704   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9705   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9706 
9707   // A BUILD_PAIR is always having the least significant part in elt 0 and the
9708   // most significant part in elt 1. So when combining into one large load, we
9709   // need to consider the endianness.
9710   if (DAG.getDataLayout().isBigEndian())
9711     std::swap(LD1, LD2);
9712 
9713   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9714       LD1->getAddressSpace() != LD2->getAddressSpace())
9715     return SDValue();
9716   EVT LD1VT = LD1->getValueType(0);
9717   unsigned LD1Bytes = LD1VT.getStoreSize();
9718   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9719       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9720     unsigned Align = LD1->getAlignment();
9721     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9722         VT.getTypeForEVT(*DAG.getContext()));
9723 
9724     if (NewAlign <= Align &&
9725         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9726       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9727                          LD1->getPointerInfo(), Align);
9728   }
9729 
9730   return SDValue();
9731 }
9732 
9733 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9734   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9735   // and Lo parts; on big-endian machines it doesn't.
9736   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9737 }
9738 
9739 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9740                                     const TargetLowering &TLI) {
9741   // If this is not a bitcast to an FP type or if the target doesn't have
9742   // IEEE754-compliant FP logic, we're done.
9743   EVT VT = N->getValueType(0);
9744   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9745     return SDValue();
9746 
9747   // TODO: Use splat values for the constant-checking below and remove this
9748   // restriction.
9749   SDValue N0 = N->getOperand(0);
9750   EVT SourceVT = N0.getValueType();
9751   if (SourceVT.isVector())
9752     return SDValue();
9753 
9754   unsigned FPOpcode;
9755   APInt SignMask;
9756   switch (N0.getOpcode()) {
9757   case ISD::AND:
9758     FPOpcode = ISD::FABS;
9759     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
9760     break;
9761   case ISD::XOR:
9762     FPOpcode = ISD::FNEG;
9763     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
9764     break;
9765   // TODO: ISD::OR --> ISD::FNABS?
9766   default:
9767     return SDValue();
9768   }
9769 
9770   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9771   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9772   SDValue LogicOp0 = N0.getOperand(0);
9773   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9774   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9775       LogicOp0.getOpcode() == ISD::BITCAST &&
9776       LogicOp0->getOperand(0).getValueType() == VT)
9777     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
9778 
9779   return SDValue();
9780 }
9781 
9782 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9783   SDValue N0 = N->getOperand(0);
9784   EVT VT = N->getValueType(0);
9785 
9786   if (N0.isUndef())
9787     return DAG.getUNDEF(VT);
9788 
9789   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9790   // Only do this before legalize, since afterward the target may be depending
9791   // on the bitconvert.
9792   // First check to see if this is all constant.
9793   if (!LegalTypes &&
9794       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9795       VT.isVector()) {
9796     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
9797 
9798     EVT DestEltVT = N->getValueType(0).getVectorElementType();
9799     assert(!DestEltVT.isVector() &&
9800            "Element type of vector ValueType must not be vector!");
9801     if (isSimple)
9802       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
9803   }
9804 
9805   // If the input is a constant, let getNode fold it.
9806   // We always need to check that this is just a fp -> int or int -> conversion
9807   // otherwise we will get back N which will confuse the caller into thinking
9808   // we used CombineTo. This can block target combines from running. If we can't
9809   // allowed legal operations, we need to ensure the resulting operation will be
9810   // legal.
9811   // TODO: Maybe we should check that the return value isn't N explicitly?
9812   if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9813        (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
9814       (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9815        (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
9816     return DAG.getBitcast(VT, N0);
9817 
9818   // (conv (conv x, t1), t2) -> (conv x, t2)
9819   if (N0.getOpcode() == ISD::BITCAST)
9820     return DAG.getBitcast(VT, N0.getOperand(0));
9821 
9822   // fold (conv (load x)) -> (load (conv*)x)
9823   // If the resultant load doesn't need a higher alignment than the original!
9824   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9825       // Do not change the width of a volatile load.
9826       !cast<LoadSDNode>(N0)->isVolatile() &&
9827       // Do not remove the cast if the types differ in endian layout.
9828       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9829           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9830       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
9831       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9832     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9833     unsigned OrigAlign = LN0->getAlignment();
9834 
9835     bool Fast = false;
9836     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9837                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
9838         Fast) {
9839       SDValue Load =
9840           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9841                       LN0->getPointerInfo(), OrigAlign,
9842                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9843       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9844       return Load;
9845     }
9846   }
9847 
9848   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9849     return V;
9850 
9851   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9852   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9853   //
9854   // For ppc_fp128:
9855   // fold (bitcast (fneg x)) ->
9856   //     flipbit = signbit
9857   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9858   //
9859   // fold (bitcast (fabs x)) ->
9860   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
9861   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9862   // This often reduces constant pool loads.
9863   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9864        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9865       N0.getNode()->hasOneUse() && VT.isInteger() &&
9866       !VT.isVector() && !N0.getValueType().isVector()) {
9867     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9868     AddToWorklist(NewConv.getNode());
9869 
9870     SDLoc DL(N);
9871     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9872       assert(VT.getSizeInBits() == 128);
9873       SDValue SignBit = DAG.getConstant(
9874           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9875       SDValue FlipBit;
9876       if (N0.getOpcode() == ISD::FNEG) {
9877         FlipBit = SignBit;
9878         AddToWorklist(FlipBit.getNode());
9879       } else {
9880         assert(N0.getOpcode() == ISD::FABS);
9881         SDValue Hi =
9882             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9883                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9884                                               SDLoc(NewConv)));
9885         AddToWorklist(Hi.getNode());
9886         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9887         AddToWorklist(FlipBit.getNode());
9888       }
9889       SDValue FlipBits =
9890           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9891       AddToWorklist(FlipBits.getNode());
9892       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9893     }
9894     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9895     if (N0.getOpcode() == ISD::FNEG)
9896       return DAG.getNode(ISD::XOR, DL, VT,
9897                          NewConv, DAG.getConstant(SignBit, DL, VT));
9898     assert(N0.getOpcode() == ISD::FABS);
9899     return DAG.getNode(ISD::AND, DL, VT,
9900                        NewConv, DAG.getConstant(~SignBit, DL, VT));
9901   }
9902 
9903   // fold (bitconvert (fcopysign cst, x)) ->
9904   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
9905   // Note that we don't handle (copysign x, cst) because this can always be
9906   // folded to an fneg or fabs.
9907   //
9908   // For ppc_fp128:
9909   // fold (bitcast (fcopysign cst, x)) ->
9910   //     flipbit = (and (extract_element
9911   //                     (xor (bitcast cst), (bitcast x)), 0),
9912   //                    signbit)
9913   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
9914   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9915       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9916       VT.isInteger() && !VT.isVector()) {
9917     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9918     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9919     if (isTypeLegal(IntXVT)) {
9920       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9921       AddToWorklist(X.getNode());
9922 
9923       // If X has a different width than the result/lhs, sext it or truncate it.
9924       unsigned VTWidth = VT.getSizeInBits();
9925       if (OrigXWidth < VTWidth) {
9926         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9927         AddToWorklist(X.getNode());
9928       } else if (OrigXWidth > VTWidth) {
9929         // To get the sign bit in the right place, we have to shift it right
9930         // before truncating.
9931         SDLoc DL(X);
9932         X = DAG.getNode(ISD::SRL, DL,
9933                         X.getValueType(), X,
9934                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9935                                         X.getValueType()));
9936         AddToWorklist(X.getNode());
9937         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9938         AddToWorklist(X.getNode());
9939       }
9940 
9941       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9942         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9943         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9944         AddToWorklist(Cst.getNode());
9945         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9946         AddToWorklist(X.getNode());
9947         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9948         AddToWorklist(XorResult.getNode());
9949         SDValue XorResult64 = DAG.getNode(
9950             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9951             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9952                                   SDLoc(XorResult)));
9953         AddToWorklist(XorResult64.getNode());
9954         SDValue FlipBit =
9955             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9956                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9957         AddToWorklist(FlipBit.getNode());
9958         SDValue FlipBits =
9959             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9960         AddToWorklist(FlipBits.getNode());
9961         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9962       }
9963       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9964       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9965                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
9966       AddToWorklist(X.getNode());
9967 
9968       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9969       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9970                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9971       AddToWorklist(Cst.getNode());
9972 
9973       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9974     }
9975   }
9976 
9977   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9978   if (N0.getOpcode() == ISD::BUILD_PAIR)
9979     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9980       return CombineLD;
9981 
9982   // Remove double bitcasts from shuffles - this is often a legacy of
9983   // XformToShuffleWithZero being used to combine bitmaskings (of
9984   // float vectors bitcast to integer vectors) into shuffles.
9985   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9986   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9987       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9988       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9989       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9990     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9991 
9992     // If operands are a bitcast, peek through if it casts the original VT.
9993     // If operands are a constant, just bitcast back to original VT.
9994     auto PeekThroughBitcast = [&](SDValue Op) {
9995       if (Op.getOpcode() == ISD::BITCAST &&
9996           Op.getOperand(0).getValueType() == VT)
9997         return SDValue(Op.getOperand(0));
9998       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9999           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10000         return DAG.getBitcast(VT, Op);
10001       return SDValue();
10002     };
10003 
10004     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10005     // the result type of this bitcast. This would eliminate at least one
10006     // bitcast. See the transform in InstCombine.
10007     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10008     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10009     if (!(SV0 && SV1))
10010       return SDValue();
10011 
10012     int MaskScale =
10013         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10014     SmallVector<int, 8> NewMask;
10015     for (int M : SVN->getMask())
10016       for (int i = 0; i != MaskScale; ++i)
10017         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10018 
10019     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10020     if (!LegalMask) {
10021       std::swap(SV0, SV1);
10022       ShuffleVectorSDNode::commuteMask(NewMask);
10023       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10024     }
10025 
10026     if (LegalMask)
10027       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10028   }
10029 
10030   return SDValue();
10031 }
10032 
10033 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10034   EVT VT = N->getValueType(0);
10035   return CombineConsecutiveLoads(N, VT);
10036 }
10037 
10038 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10039 /// operands. DstEltVT indicates the destination element value type.
10040 SDValue DAGCombiner::
10041 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10042   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10043 
10044   // If this is already the right type, we're done.
10045   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10046 
10047   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10048   unsigned DstBitSize = DstEltVT.getSizeInBits();
10049 
10050   // If this is a conversion of N elements of one type to N elements of another
10051   // type, convert each element.  This handles FP<->INT cases.
10052   if (SrcBitSize == DstBitSize) {
10053     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10054                               BV->getValueType(0).getVectorNumElements());
10055 
10056     // Due to the FP element handling below calling this routine recursively,
10057     // we can end up with a scalar-to-vector node here.
10058     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
10059       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
10060                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
10061 
10062     SmallVector<SDValue, 8> Ops;
10063     for (SDValue Op : BV->op_values()) {
10064       // If the vector element type is not legal, the BUILD_VECTOR operands
10065       // are promoted and implicitly truncated.  Make that explicit here.
10066       if (Op.getValueType() != SrcEltVT)
10067         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10068       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10069       AddToWorklist(Ops.back().getNode());
10070     }
10071     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10072   }
10073 
10074   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10075   // handle annoying details of growing/shrinking FP values, we convert them to
10076   // int first.
10077   if (SrcEltVT.isFloatingPoint()) {
10078     // Convert the input float vector to a int vector where the elements are the
10079     // same sizes.
10080     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10081     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10082     SrcEltVT = IntVT;
10083   }
10084 
10085   // Now we know the input is an integer vector.  If the output is a FP type,
10086   // convert to integer first, then to FP of the right size.
10087   if (DstEltVT.isFloatingPoint()) {
10088     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10089     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10090 
10091     // Next, convert to FP elements of the same size.
10092     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10093   }
10094 
10095   SDLoc DL(BV);
10096 
10097   // Okay, we know the src/dst types are both integers of differing types.
10098   // Handling growing first.
10099   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10100   if (SrcBitSize < DstBitSize) {
10101     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10102 
10103     SmallVector<SDValue, 8> Ops;
10104     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10105          i += NumInputsPerOutput) {
10106       bool isLE = DAG.getDataLayout().isLittleEndian();
10107       APInt NewBits = APInt(DstBitSize, 0);
10108       bool EltIsUndef = true;
10109       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10110         // Shift the previously computed bits over.
10111         NewBits <<= SrcBitSize;
10112         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10113         if (Op.isUndef()) continue;
10114         EltIsUndef = false;
10115 
10116         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10117                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10118       }
10119 
10120       if (EltIsUndef)
10121         Ops.push_back(DAG.getUNDEF(DstEltVT));
10122       else
10123         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10124     }
10125 
10126     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10127     return DAG.getBuildVector(VT, DL, Ops);
10128   }
10129 
10130   // Finally, this must be the case where we are shrinking elements: each input
10131   // turns into multiple outputs.
10132   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10133   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10134                             NumOutputsPerInput*BV->getNumOperands());
10135   SmallVector<SDValue, 8> Ops;
10136 
10137   for (const SDValue &Op : BV->op_values()) {
10138     if (Op.isUndef()) {
10139       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10140       continue;
10141     }
10142 
10143     APInt OpVal = cast<ConstantSDNode>(Op)->
10144                   getAPIntValue().zextOrTrunc(SrcBitSize);
10145 
10146     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10147       APInt ThisVal = OpVal.trunc(DstBitSize);
10148       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10149       OpVal.lshrInPlace(DstBitSize);
10150     }
10151 
10152     // For big endian targets, swap the order of the pieces of each element.
10153     if (DAG.getDataLayout().isBigEndian())
10154       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10155   }
10156 
10157   return DAG.getBuildVector(VT, DL, Ops);
10158 }
10159 
10160 static bool isContractable(SDNode *N) {
10161   SDNodeFlags F = N->getFlags();
10162   return F.hasAllowContract() || F.hasAllowReassociation();
10163 }
10164 
10165 /// Try to perform FMA combining on a given FADD node.
10166 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10167   SDValue N0 = N->getOperand(0);
10168   SDValue N1 = N->getOperand(1);
10169   EVT VT = N->getValueType(0);
10170   SDLoc SL(N);
10171 
10172   const TargetOptions &Options = DAG.getTarget().Options;
10173 
10174   // Floating-point multiply-add with intermediate rounding.
10175   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10176 
10177   // Floating-point multiply-add without intermediate rounding.
10178   bool HasFMA =
10179       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10180       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10181 
10182   // No valid opcode, do not combine.
10183   if (!HasFMAD && !HasFMA)
10184     return SDValue();
10185 
10186   SDNodeFlags Flags = N->getFlags();
10187   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10188   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10189                               CanFuse || HasFMAD);
10190   // If the addition is not contractable, do not combine.
10191   if (!AllowFusionGlobally && !isContractable(N))
10192     return SDValue();
10193 
10194   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10195   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10196     return SDValue();
10197 
10198   // Always prefer FMAD to FMA for precision.
10199   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10200   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10201 
10202   // Is the node an FMUL and contractable either due to global flags or
10203   // SDNodeFlags.
10204   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10205     if (N.getOpcode() != ISD::FMUL)
10206       return false;
10207     return AllowFusionGlobally || isContractable(N.getNode());
10208   };
10209   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10210   // prefer to fold the multiply with fewer uses.
10211   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10212     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10213       std::swap(N0, N1);
10214   }
10215 
10216   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10217   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10218     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10219                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10220   }
10221 
10222   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10223   // Note: Commutes FADD operands.
10224   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10225     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10226                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10227   }
10228 
10229   // Look through FP_EXTEND nodes to do more combining.
10230 
10231   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10232   if (N0.getOpcode() == ISD::FP_EXTEND) {
10233     SDValue N00 = N0.getOperand(0);
10234     if (isContractableFMUL(N00) &&
10235         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10236       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10237                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10238                                      N00.getOperand(0)),
10239                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10240                                      N00.getOperand(1)), N1, Flags);
10241     }
10242   }
10243 
10244   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10245   // Note: Commutes FADD operands.
10246   if (N1.getOpcode() == ISD::FP_EXTEND) {
10247     SDValue N10 = N1.getOperand(0);
10248     if (isContractableFMUL(N10) &&
10249         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10250       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10251                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10252                                      N10.getOperand(0)),
10253                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10254                                      N10.getOperand(1)), N0, Flags);
10255     }
10256   }
10257 
10258   // More folding opportunities when target permits.
10259   if (Aggressive) {
10260     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10261     if (CanFuse &&
10262         N0.getOpcode() == PreferredFusedOpcode &&
10263         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10264         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10265       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10266                          N0.getOperand(0), N0.getOperand(1),
10267                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10268                                      N0.getOperand(2).getOperand(0),
10269                                      N0.getOperand(2).getOperand(1),
10270                                      N1, Flags), Flags);
10271     }
10272 
10273     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10274     if (CanFuse &&
10275         N1->getOpcode() == PreferredFusedOpcode &&
10276         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10277         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10278       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10279                          N1.getOperand(0), N1.getOperand(1),
10280                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10281                                      N1.getOperand(2).getOperand(0),
10282                                      N1.getOperand(2).getOperand(1),
10283                                      N0, Flags), Flags);
10284     }
10285 
10286 
10287     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10288     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10289     auto FoldFAddFMAFPExtFMul = [&] (
10290       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10291       SDNodeFlags Flags) {
10292       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10293                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10294                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10295                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10296                                      Z, Flags), Flags);
10297     };
10298     if (N0.getOpcode() == PreferredFusedOpcode) {
10299       SDValue N02 = N0.getOperand(2);
10300       if (N02.getOpcode() == ISD::FP_EXTEND) {
10301         SDValue N020 = N02.getOperand(0);
10302         if (isContractableFMUL(N020) &&
10303             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10304           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10305                                       N020.getOperand(0), N020.getOperand(1),
10306                                       N1, Flags);
10307         }
10308       }
10309     }
10310 
10311     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10312     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10313     // FIXME: This turns two single-precision and one double-precision
10314     // operation into two double-precision operations, which might not be
10315     // interesting for all targets, especially GPUs.
10316     auto FoldFAddFPExtFMAFMul = [&] (
10317       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10318       SDNodeFlags Flags) {
10319       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10320                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10321                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10322                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10323                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10324                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10325                                      Z, Flags), Flags);
10326     };
10327     if (N0.getOpcode() == ISD::FP_EXTEND) {
10328       SDValue N00 = N0.getOperand(0);
10329       if (N00.getOpcode() == PreferredFusedOpcode) {
10330         SDValue N002 = N00.getOperand(2);
10331         if (isContractableFMUL(N002) &&
10332             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10333           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10334                                       N002.getOperand(0), N002.getOperand(1),
10335                                       N1, Flags);
10336         }
10337       }
10338     }
10339 
10340     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10341     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10342     if (N1.getOpcode() == PreferredFusedOpcode) {
10343       SDValue N12 = N1.getOperand(2);
10344       if (N12.getOpcode() == ISD::FP_EXTEND) {
10345         SDValue N120 = N12.getOperand(0);
10346         if (isContractableFMUL(N120) &&
10347             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10348           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10349                                       N120.getOperand(0), N120.getOperand(1),
10350                                       N0, Flags);
10351         }
10352       }
10353     }
10354 
10355     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10356     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10357     // FIXME: This turns two single-precision and one double-precision
10358     // operation into two double-precision operations, which might not be
10359     // interesting for all targets, especially GPUs.
10360     if (N1.getOpcode() == ISD::FP_EXTEND) {
10361       SDValue N10 = N1.getOperand(0);
10362       if (N10.getOpcode() == PreferredFusedOpcode) {
10363         SDValue N102 = N10.getOperand(2);
10364         if (isContractableFMUL(N102) &&
10365             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10366           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10367                                       N102.getOperand(0), N102.getOperand(1),
10368                                       N0, Flags);
10369         }
10370       }
10371     }
10372   }
10373 
10374   return SDValue();
10375 }
10376 
10377 /// Try to perform FMA combining on a given FSUB node.
10378 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10379   SDValue N0 = N->getOperand(0);
10380   SDValue N1 = N->getOperand(1);
10381   EVT VT = N->getValueType(0);
10382   SDLoc SL(N);
10383 
10384   const TargetOptions &Options = DAG.getTarget().Options;
10385   // Floating-point multiply-add with intermediate rounding.
10386   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10387 
10388   // Floating-point multiply-add without intermediate rounding.
10389   bool HasFMA =
10390       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10391       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10392 
10393   // No valid opcode, do not combine.
10394   if (!HasFMAD && !HasFMA)
10395     return SDValue();
10396 
10397   const SDNodeFlags Flags = N->getFlags();
10398   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10399   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10400                               CanFuse || HasFMAD);
10401 
10402   // If the subtraction is not contractable, do not combine.
10403   if (!AllowFusionGlobally && !isContractable(N))
10404     return SDValue();
10405 
10406   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10407   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10408     return SDValue();
10409 
10410   // Always prefer FMAD to FMA for precision.
10411   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10412   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10413 
10414   // Is the node an FMUL and contractable either due to global flags or
10415   // SDNodeFlags.
10416   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10417     if (N.getOpcode() != ISD::FMUL)
10418       return false;
10419     return AllowFusionGlobally || isContractable(N.getNode());
10420   };
10421 
10422   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10423   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10424     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10425                        N0.getOperand(0), N0.getOperand(1),
10426                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10427   }
10428 
10429   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10430   // Note: Commutes FSUB operands.
10431   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10432     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10433                        DAG.getNode(ISD::FNEG, SL, VT,
10434                                    N1.getOperand(0)),
10435                        N1.getOperand(1), N0, Flags);
10436   }
10437 
10438   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10439   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10440       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10441     SDValue N00 = N0.getOperand(0).getOperand(0);
10442     SDValue N01 = N0.getOperand(0).getOperand(1);
10443     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10444                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10445                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10446   }
10447 
10448   // Look through FP_EXTEND nodes to do more combining.
10449 
10450   // fold (fsub (fpext (fmul x, y)), z)
10451   //   -> (fma (fpext x), (fpext y), (fneg z))
10452   if (N0.getOpcode() == ISD::FP_EXTEND) {
10453     SDValue N00 = N0.getOperand(0);
10454     if (isContractableFMUL(N00) &&
10455         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10456       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10457                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10458                                      N00.getOperand(0)),
10459                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10460                                      N00.getOperand(1)),
10461                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10462     }
10463   }
10464 
10465   // fold (fsub x, (fpext (fmul y, z)))
10466   //   -> (fma (fneg (fpext y)), (fpext z), x)
10467   // Note: Commutes FSUB operands.
10468   if (N1.getOpcode() == ISD::FP_EXTEND) {
10469     SDValue N10 = N1.getOperand(0);
10470     if (isContractableFMUL(N10) &&
10471         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10472       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10473                          DAG.getNode(ISD::FNEG, SL, VT,
10474                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10475                                                  N10.getOperand(0))),
10476                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10477                                      N10.getOperand(1)),
10478                          N0, Flags);
10479     }
10480   }
10481 
10482   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10483   //   -> (fneg (fma (fpext x), (fpext y), z))
10484   // Note: This could be removed with appropriate canonicalization of the
10485   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10486   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10487   // from implementing the canonicalization in visitFSUB.
10488   if (N0.getOpcode() == ISD::FP_EXTEND) {
10489     SDValue N00 = N0.getOperand(0);
10490     if (N00.getOpcode() == ISD::FNEG) {
10491       SDValue N000 = N00.getOperand(0);
10492       if (isContractableFMUL(N000) &&
10493           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10494         return DAG.getNode(ISD::FNEG, SL, VT,
10495                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10496                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10497                                                    N000.getOperand(0)),
10498                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10499                                                    N000.getOperand(1)),
10500                                        N1, Flags));
10501       }
10502     }
10503   }
10504 
10505   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10506   //   -> (fneg (fma (fpext x)), (fpext y), z)
10507   // Note: This could be removed with appropriate canonicalization of the
10508   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10509   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10510   // from implementing the canonicalization in visitFSUB.
10511   if (N0.getOpcode() == ISD::FNEG) {
10512     SDValue N00 = N0.getOperand(0);
10513     if (N00.getOpcode() == ISD::FP_EXTEND) {
10514       SDValue N000 = N00.getOperand(0);
10515       if (isContractableFMUL(N000) &&
10516           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10517         return DAG.getNode(ISD::FNEG, SL, VT,
10518                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10519                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10520                                                    N000.getOperand(0)),
10521                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10522                                                    N000.getOperand(1)),
10523                                        N1, Flags));
10524       }
10525     }
10526   }
10527 
10528   // More folding opportunities when target permits.
10529   if (Aggressive) {
10530     // fold (fsub (fma x, y, (fmul u, v)), z)
10531     //   -> (fma x, y (fma u, v, (fneg z)))
10532     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10533         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10534         N0.getOperand(2)->hasOneUse()) {
10535       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10536                          N0.getOperand(0), N0.getOperand(1),
10537                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10538                                      N0.getOperand(2).getOperand(0),
10539                                      N0.getOperand(2).getOperand(1),
10540                                      DAG.getNode(ISD::FNEG, SL, VT,
10541                                                  N1), Flags), Flags);
10542     }
10543 
10544     // fold (fsub x, (fma y, z, (fmul u, v)))
10545     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10546     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10547         isContractableFMUL(N1.getOperand(2))) {
10548       SDValue N20 = N1.getOperand(2).getOperand(0);
10549       SDValue N21 = N1.getOperand(2).getOperand(1);
10550       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10551                          DAG.getNode(ISD::FNEG, SL, VT,
10552                                      N1.getOperand(0)),
10553                          N1.getOperand(1),
10554                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10555                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10556                                      N21, N0, Flags), Flags);
10557     }
10558 
10559 
10560     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10561     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10562     if (N0.getOpcode() == PreferredFusedOpcode) {
10563       SDValue N02 = N0.getOperand(2);
10564       if (N02.getOpcode() == ISD::FP_EXTEND) {
10565         SDValue N020 = N02.getOperand(0);
10566         if (isContractableFMUL(N020) &&
10567             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10568           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10569                              N0.getOperand(0), N0.getOperand(1),
10570                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10571                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10572                                                      N020.getOperand(0)),
10573                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10574                                                      N020.getOperand(1)),
10575                                          DAG.getNode(ISD::FNEG, SL, VT,
10576                                                      N1), Flags), Flags);
10577         }
10578       }
10579     }
10580 
10581     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10582     //   -> (fma (fpext x), (fpext y),
10583     //           (fma (fpext u), (fpext v), (fneg z)))
10584     // FIXME: This turns two single-precision and one double-precision
10585     // operation into two double-precision operations, which might not be
10586     // interesting for all targets, especially GPUs.
10587     if (N0.getOpcode() == ISD::FP_EXTEND) {
10588       SDValue N00 = N0.getOperand(0);
10589       if (N00.getOpcode() == PreferredFusedOpcode) {
10590         SDValue N002 = N00.getOperand(2);
10591         if (isContractableFMUL(N002) &&
10592             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10593           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10594                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10595                                          N00.getOperand(0)),
10596                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10597                                          N00.getOperand(1)),
10598                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10599                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10600                                                      N002.getOperand(0)),
10601                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10602                                                      N002.getOperand(1)),
10603                                          DAG.getNode(ISD::FNEG, SL, VT,
10604                                                      N1), Flags), Flags);
10605         }
10606       }
10607     }
10608 
10609     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10610     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10611     if (N1.getOpcode() == PreferredFusedOpcode &&
10612         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10613       SDValue N120 = N1.getOperand(2).getOperand(0);
10614       if (isContractableFMUL(N120) &&
10615           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10616         SDValue N1200 = N120.getOperand(0);
10617         SDValue N1201 = N120.getOperand(1);
10618         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10619                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10620                            N1.getOperand(1),
10621                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10622                                        DAG.getNode(ISD::FNEG, SL, VT,
10623                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10624                                                                VT, N1200)),
10625                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10626                                                    N1201),
10627                                        N0, Flags), Flags);
10628       }
10629     }
10630 
10631     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10632     //   -> (fma (fneg (fpext y)), (fpext z),
10633     //           (fma (fneg (fpext u)), (fpext v), x))
10634     // FIXME: This turns two single-precision and one double-precision
10635     // operation into two double-precision operations, which might not be
10636     // interesting for all targets, especially GPUs.
10637     if (N1.getOpcode() == ISD::FP_EXTEND &&
10638         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10639       SDValue CvtSrc = N1.getOperand(0);
10640       SDValue N100 = CvtSrc.getOperand(0);
10641       SDValue N101 = CvtSrc.getOperand(1);
10642       SDValue N102 = CvtSrc.getOperand(2);
10643       if (isContractableFMUL(N102) &&
10644           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10645         SDValue N1020 = N102.getOperand(0);
10646         SDValue N1021 = N102.getOperand(1);
10647         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10648                            DAG.getNode(ISD::FNEG, SL, VT,
10649                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10650                                                    N100)),
10651                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10652                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10653                                        DAG.getNode(ISD::FNEG, SL, VT,
10654                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10655                                                                VT, N1020)),
10656                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10657                                                    N1021),
10658                                        N0, Flags), Flags);
10659       }
10660     }
10661   }
10662 
10663   return SDValue();
10664 }
10665 
10666 /// Try to perform FMA combining on a given FMUL node based on the distributive
10667 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10668 /// subtraction instead of addition).
10669 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10670   SDValue N0 = N->getOperand(0);
10671   SDValue N1 = N->getOperand(1);
10672   EVT VT = N->getValueType(0);
10673   SDLoc SL(N);
10674   const SDNodeFlags Flags = N->getFlags();
10675 
10676   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10677 
10678   const TargetOptions &Options = DAG.getTarget().Options;
10679 
10680   // The transforms below are incorrect when x == 0 and y == inf, because the
10681   // intermediate multiplication produces a nan.
10682   if (!Options.NoInfsFPMath)
10683     return SDValue();
10684 
10685   // Floating-point multiply-add without intermediate rounding.
10686   bool HasFMA =
10687       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10688       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10689       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10690 
10691   // Floating-point multiply-add with intermediate rounding. This can result
10692   // in a less precise result due to the changed rounding order.
10693   bool HasFMAD = Options.UnsafeFPMath &&
10694                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10695 
10696   // No valid opcode, do not combine.
10697   if (!HasFMAD && !HasFMA)
10698     return SDValue();
10699 
10700   // Always prefer FMAD to FMA for precision.
10701   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10702   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10703 
10704   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
10705   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
10706   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10707     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10708       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10709       if (XC1 && XC1->isExactlyValue(+1.0))
10710         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10711                            Y, Flags);
10712       if (XC1 && XC1->isExactlyValue(-1.0))
10713         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10714                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10715     }
10716     return SDValue();
10717   };
10718 
10719   if (SDValue FMA = FuseFADD(N0, N1, Flags))
10720     return FMA;
10721   if (SDValue FMA = FuseFADD(N1, N0, Flags))
10722     return FMA;
10723 
10724   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
10725   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
10726   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
10727   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
10728   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10729     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10730       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
10731       if (XC0 && XC0->isExactlyValue(+1.0))
10732         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10733                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10734                            Y, Flags);
10735       if (XC0 && XC0->isExactlyValue(-1.0))
10736         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10737                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10738                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10739 
10740       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10741       if (XC1 && XC1->isExactlyValue(+1.0))
10742         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10743                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10744       if (XC1 && XC1->isExactlyValue(-1.0))
10745         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10746                            Y, Flags);
10747     }
10748     return SDValue();
10749   };
10750 
10751   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10752     return FMA;
10753   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10754     return FMA;
10755 
10756   return SDValue();
10757 }
10758 
10759 static bool isFMulNegTwo(SDValue &N) {
10760   if (N.getOpcode() != ISD::FMUL)
10761     return false;
10762   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
10763     return CFP->isExactlyValue(-2.0);
10764   return false;
10765 }
10766 
10767 SDValue DAGCombiner::visitFADD(SDNode *N) {
10768   SDValue N0 = N->getOperand(0);
10769   SDValue N1 = N->getOperand(1);
10770   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10771   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10772   EVT VT = N->getValueType(0);
10773   SDLoc DL(N);
10774   const TargetOptions &Options = DAG.getTarget().Options;
10775   const SDNodeFlags Flags = N->getFlags();
10776 
10777   // fold vector ops
10778   if (VT.isVector())
10779     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10780       return FoldedVOp;
10781 
10782   // fold (fadd c1, c2) -> c1 + c2
10783   if (N0CFP && N1CFP)
10784     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10785 
10786   // canonicalize constant to RHS
10787   if (N0CFP && !N1CFP)
10788     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10789 
10790   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
10791   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
10792   if (N1C && N1C->isZero())
10793     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
10794       return N0;
10795 
10796   if (SDValue NewSel = foldBinOpIntoSelect(N))
10797     return NewSel;
10798 
10799   // fold (fadd A, (fneg B)) -> (fsub A, B)
10800   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10801       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10802     return DAG.getNode(ISD::FSUB, DL, VT, N0,
10803                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10804 
10805   // fold (fadd (fneg A), B) -> (fsub B, A)
10806   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10807       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10808     return DAG.getNode(ISD::FSUB, DL, VT, N1,
10809                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10810 
10811   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
10812   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
10813   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
10814       (isFMulNegTwo(N1) && N1.hasOneUse())) {
10815     bool N1IsFMul = isFMulNegTwo(N1);
10816     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
10817     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
10818     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
10819   }
10820 
10821   // No FP constant should be created after legalization as Instruction
10822   // Selection pass has a hard time dealing with FP constants.
10823   bool AllowNewConst = (Level < AfterLegalizeDAG);
10824 
10825   // If 'unsafe math' or nnan is enabled, fold lots of things.
10826   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10827     // If allowed, fold (fadd (fneg x), x) -> 0.0
10828     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10829       return DAG.getConstantFP(0.0, DL, VT);
10830 
10831     // If allowed, fold (fadd x, (fneg x)) -> 0.0
10832     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10833       return DAG.getConstantFP(0.0, DL, VT);
10834   }
10835 
10836   // If 'unsafe math' or reassoc and nsz, fold lots of things.
10837   // TODO: break out portions of the transformations below for which Unsafe is
10838   //       considered and which do not require both nsz and reassoc
10839   if ((Options.UnsafeFPMath ||
10840        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10841       AllowNewConst) {
10842     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10843     if (N1CFP && N0.getOpcode() == ISD::FADD &&
10844         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10845       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10846       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10847     }
10848 
10849     // We can fold chains of FADD's of the same value into multiplications.
10850     // This transform is not safe in general because we are reducing the number
10851     // of rounding steps.
10852     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10853       if (N0.getOpcode() == ISD::FMUL) {
10854         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10855         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10856 
10857         // (fadd (fmul x, c), x) -> (fmul x, c+1)
10858         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10859           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10860                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10861           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10862         }
10863 
10864         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10865         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10866             N1.getOperand(0) == N1.getOperand(1) &&
10867             N0.getOperand(0) == N1.getOperand(0)) {
10868           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10869                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10870           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10871         }
10872       }
10873 
10874       if (N1.getOpcode() == ISD::FMUL) {
10875         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10876         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10877 
10878         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10879         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10880           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10881                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10882           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10883         }
10884 
10885         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10886         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10887             N0.getOperand(0) == N0.getOperand(1) &&
10888             N1.getOperand(0) == N0.getOperand(0)) {
10889           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10890                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10891           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10892         }
10893       }
10894 
10895       if (N0.getOpcode() == ISD::FADD) {
10896         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10897         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10898         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10899             (N0.getOperand(0) == N1)) {
10900           return DAG.getNode(ISD::FMUL, DL, VT,
10901                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10902         }
10903       }
10904 
10905       if (N1.getOpcode() == ISD::FADD) {
10906         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10907         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10908         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10909             N1.getOperand(0) == N0) {
10910           return DAG.getNode(ISD::FMUL, DL, VT,
10911                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10912         }
10913       }
10914 
10915       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10916       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10917           N0.getOperand(0) == N0.getOperand(1) &&
10918           N1.getOperand(0) == N1.getOperand(1) &&
10919           N0.getOperand(0) == N1.getOperand(0)) {
10920         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10921                            DAG.getConstantFP(4.0, DL, VT), Flags);
10922       }
10923     }
10924   } // enable-unsafe-fp-math
10925 
10926   // FADD -> FMA combines:
10927   if (SDValue Fused = visitFADDForFMACombine(N)) {
10928     AddToWorklist(Fused.getNode());
10929     return Fused;
10930   }
10931   return SDValue();
10932 }
10933 
10934 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10935   SDValue N0 = N->getOperand(0);
10936   SDValue N1 = N->getOperand(1);
10937   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10938   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10939   EVT VT = N->getValueType(0);
10940   SDLoc DL(N);
10941   const TargetOptions &Options = DAG.getTarget().Options;
10942   const SDNodeFlags Flags = N->getFlags();
10943 
10944   // fold vector ops
10945   if (VT.isVector())
10946     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10947       return FoldedVOp;
10948 
10949   // fold (fsub c1, c2) -> c1-c2
10950   if (N0CFP && N1CFP)
10951     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10952 
10953   if (SDValue NewSel = foldBinOpIntoSelect(N))
10954     return NewSel;
10955 
10956   // (fsub A, 0) -> A
10957   if (N1CFP && N1CFP->isZero()) {
10958     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
10959         Flags.hasNoSignedZeros()) {
10960       return N0;
10961     }
10962   }
10963 
10964   if (N0 == N1) {
10965     // (fsub x, x) -> 0.0
10966     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
10967       return DAG.getConstantFP(0.0f, DL, VT);
10968   }
10969 
10970   // (fsub 0, B) -> -B
10971   if (N0CFP && N0CFP->isZero()) {
10972     if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
10973       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10974         return GetNegatedExpression(N1, DAG, LegalOperations);
10975       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10976         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10977     }
10978   }
10979 
10980   if ((Options.UnsafeFPMath ||
10981       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
10982       && N1.getOpcode() == ISD::FADD) {
10983     // X - (X + Y) -> -Y
10984     if (N0 == N1->getOperand(0))
10985       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
10986     // X - (Y + X) -> -Y
10987     if (N0 == N1->getOperand(1))
10988       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
10989   }
10990 
10991   // fold (fsub A, (fneg B)) -> (fadd A, B)
10992   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10993     return DAG.getNode(ISD::FADD, DL, VT, N0,
10994                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10995 
10996   // FSUB -> FMA combines:
10997   if (SDValue Fused = visitFSUBForFMACombine(N)) {
10998     AddToWorklist(Fused.getNode());
10999     return Fused;
11000   }
11001 
11002   return SDValue();
11003 }
11004 
11005 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11006   SDValue N0 = N->getOperand(0);
11007   SDValue N1 = N->getOperand(1);
11008   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11009   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11010   EVT VT = N->getValueType(0);
11011   SDLoc DL(N);
11012   const TargetOptions &Options = DAG.getTarget().Options;
11013   const SDNodeFlags Flags = N->getFlags();
11014 
11015   // fold vector ops
11016   if (VT.isVector()) {
11017     // This just handles C1 * C2 for vectors. Other vector folds are below.
11018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11019       return FoldedVOp;
11020   }
11021 
11022   // fold (fmul c1, c2) -> c1*c2
11023   if (N0CFP && N1CFP)
11024     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11025 
11026   // canonicalize constant to RHS
11027   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11028      !isConstantFPBuildVectorOrConstantFP(N1))
11029     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11030 
11031   // fold (fmul A, 1.0) -> A
11032   if (N1CFP && N1CFP->isExactlyValue(1.0))
11033     return N0;
11034 
11035   if (SDValue NewSel = foldBinOpIntoSelect(N))
11036     return NewSel;
11037 
11038   if (Options.UnsafeFPMath ||
11039       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11040     // fold (fmul A, 0) -> 0
11041     if (N1CFP && N1CFP->isZero())
11042       return N1;
11043   }
11044 
11045   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11046     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11047     if (N0.getOpcode() == ISD::FMUL) {
11048       // Fold scalars or any vector constants (not just splats).
11049       // This fold is done in general by InstCombine, but extra fmul insts
11050       // may have been generated during lowering.
11051       SDValue N00 = N0.getOperand(0);
11052       SDValue N01 = N0.getOperand(1);
11053       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
11054       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
11055       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
11056 
11057       // Check 1: Make sure that the first operand of the inner multiply is NOT
11058       // a constant. Otherwise, we may induce infinite looping.
11059       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
11060         // Check 2: Make sure that the second operand of the inner multiply and
11061         // the second operand of the outer multiply are constants.
11062         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
11063             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
11064           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11065           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11066         }
11067       }
11068     }
11069 
11070     // Match a special-case: we convert X * 2.0 into fadd.
11071     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11072     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11073         N0.getOperand(0) == N0.getOperand(1)) {
11074       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11075       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11076       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11077     }
11078   }
11079 
11080   // fold (fmul X, 2.0) -> (fadd X, X)
11081   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11082     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11083 
11084   // fold (fmul X, -1.0) -> (fneg X)
11085   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11086     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11087       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11088 
11089   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11090   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11091     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11092       // Both can be negated for free, check to see if at least one is cheaper
11093       // negated.
11094       if (LHSNeg == 2 || RHSNeg == 2)
11095         return DAG.getNode(ISD::FMUL, DL, VT,
11096                            GetNegatedExpression(N0, DAG, LegalOperations),
11097                            GetNegatedExpression(N1, DAG, LegalOperations),
11098                            Flags);
11099     }
11100   }
11101 
11102   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11103   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11104   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11105       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11106       TLI.isOperationLegal(ISD::FABS, VT)) {
11107     SDValue Select = N0, X = N1;
11108     if (Select.getOpcode() != ISD::SELECT)
11109       std::swap(Select, X);
11110 
11111     SDValue Cond = Select.getOperand(0);
11112     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11113     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11114 
11115     if (TrueOpnd && FalseOpnd &&
11116         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11117         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11118         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11119       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11120       switch (CC) {
11121       default: break;
11122       case ISD::SETOLT:
11123       case ISD::SETULT:
11124       case ISD::SETOLE:
11125       case ISD::SETULE:
11126       case ISD::SETLT:
11127       case ISD::SETLE:
11128         std::swap(TrueOpnd, FalseOpnd);
11129         LLVM_FALLTHROUGH;
11130       case ISD::SETOGT:
11131       case ISD::SETUGT:
11132       case ISD::SETOGE:
11133       case ISD::SETUGE:
11134       case ISD::SETGT:
11135       case ISD::SETGE:
11136         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11137             TLI.isOperationLegal(ISD::FNEG, VT))
11138           return DAG.getNode(ISD::FNEG, DL, VT,
11139                    DAG.getNode(ISD::FABS, DL, VT, X));
11140         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11141           return DAG.getNode(ISD::FABS, DL, VT, X);
11142 
11143         break;
11144       }
11145     }
11146   }
11147 
11148   // FMUL -> FMA combines:
11149   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11150     AddToWorklist(Fused.getNode());
11151     return Fused;
11152   }
11153 
11154   return SDValue();
11155 }
11156 
11157 SDValue DAGCombiner::visitFMA(SDNode *N) {
11158   SDValue N0 = N->getOperand(0);
11159   SDValue N1 = N->getOperand(1);
11160   SDValue N2 = N->getOperand(2);
11161   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11162   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11163   EVT VT = N->getValueType(0);
11164   SDLoc DL(N);
11165   const TargetOptions &Options = DAG.getTarget().Options;
11166 
11167   // FMA nodes have flags that propagate to the created nodes.
11168   const SDNodeFlags Flags = N->getFlags();
11169   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11170 
11171   // Constant fold FMA.
11172   if (isa<ConstantFPSDNode>(N0) &&
11173       isa<ConstantFPSDNode>(N1) &&
11174       isa<ConstantFPSDNode>(N2)) {
11175     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11176   }
11177 
11178   if (UnsafeFPMath) {
11179     if (N0CFP && N0CFP->isZero())
11180       return N2;
11181     if (N1CFP && N1CFP->isZero())
11182       return N2;
11183   }
11184   // TODO: The FMA node should have flags that propagate to these nodes.
11185   if (N0CFP && N0CFP->isExactlyValue(1.0))
11186     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11187   if (N1CFP && N1CFP->isExactlyValue(1.0))
11188     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11189 
11190   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11191   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11192      !isConstantFPBuildVectorOrConstantFP(N1))
11193     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11194 
11195   if (UnsafeFPMath) {
11196     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11197     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11198         isConstantFPBuildVectorOrConstantFP(N1) &&
11199         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11200       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11201                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11202                                      Flags), Flags);
11203     }
11204 
11205     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11206     if (N0.getOpcode() == ISD::FMUL &&
11207         isConstantFPBuildVectorOrConstantFP(N1) &&
11208         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11209       return DAG.getNode(ISD::FMA, DL, VT,
11210                          N0.getOperand(0),
11211                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11212                                      Flags),
11213                          N2);
11214     }
11215   }
11216 
11217   // (fma x, 1, y) -> (fadd x, y)
11218   // (fma x, -1, y) -> (fadd (fneg x), y)
11219   if (N1CFP) {
11220     if (N1CFP->isExactlyValue(1.0))
11221       // TODO: The FMA node should have flags that propagate to this node.
11222       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11223 
11224     if (N1CFP->isExactlyValue(-1.0) &&
11225         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11226       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11227       AddToWorklist(RHSNeg.getNode());
11228       // TODO: The FMA node should have flags that propagate to this node.
11229       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11230     }
11231 
11232     // fma (fneg x), K, y -> fma x -K, y
11233     if (N0.getOpcode() == ISD::FNEG &&
11234         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11235          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11236       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11237                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11238     }
11239   }
11240 
11241   if (UnsafeFPMath) {
11242     // (fma x, c, x) -> (fmul x, (c+1))
11243     if (N1CFP && N0 == N2) {
11244       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11245                          DAG.getNode(ISD::FADD, DL, VT, N1,
11246                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11247                          Flags);
11248     }
11249 
11250     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11251     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11252       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11253                          DAG.getNode(ISD::FADD, DL, VT, N1,
11254                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11255                          Flags);
11256     }
11257   }
11258 
11259   return SDValue();
11260 }
11261 
11262 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11263 // reciprocal.
11264 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11265 // Notice that this is not always beneficial. One reason is different targets
11266 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11267 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11268 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11269 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11270   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11271   const SDNodeFlags Flags = N->getFlags();
11272   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11273     return SDValue();
11274 
11275   // Skip if current node is a reciprocal.
11276   SDValue N0 = N->getOperand(0);
11277   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11278   if (N0CFP && N0CFP->isExactlyValue(1.0))
11279     return SDValue();
11280 
11281   // Exit early if the target does not want this transform or if there can't
11282   // possibly be enough uses of the divisor to make the transform worthwhile.
11283   SDValue N1 = N->getOperand(1);
11284   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11285   if (!MinUses || N1->use_size() < MinUses)
11286     return SDValue();
11287 
11288   // Find all FDIV users of the same divisor.
11289   // Use a set because duplicates may be present in the user list.
11290   SetVector<SDNode *> Users;
11291   for (auto *U : N1->uses()) {
11292     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11293       // This division is eligible for optimization only if global unsafe math
11294       // is enabled or if this division allows reciprocal formation.
11295       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11296         Users.insert(U);
11297     }
11298   }
11299 
11300   // Now that we have the actual number of divisor uses, make sure it meets
11301   // the minimum threshold specified by the target.
11302   if (Users.size() < MinUses)
11303     return SDValue();
11304 
11305   EVT VT = N->getValueType(0);
11306   SDLoc DL(N);
11307   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11308   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11309 
11310   // Dividend / Divisor -> Dividend * Reciprocal
11311   for (auto *U : Users) {
11312     SDValue Dividend = U->getOperand(0);
11313     if (Dividend != FPOne) {
11314       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11315                                     Reciprocal, Flags);
11316       CombineTo(U, NewNode);
11317     } else if (U != Reciprocal.getNode()) {
11318       // In the absence of fast-math-flags, this user node is always the
11319       // same node as Reciprocal, but with FMF they may be different nodes.
11320       CombineTo(U, Reciprocal);
11321     }
11322   }
11323   return SDValue(N, 0);  // N was replaced.
11324 }
11325 
11326 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11327   SDValue N0 = N->getOperand(0);
11328   SDValue N1 = N->getOperand(1);
11329   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11330   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11331   EVT VT = N->getValueType(0);
11332   SDLoc DL(N);
11333   const TargetOptions &Options = DAG.getTarget().Options;
11334   SDNodeFlags Flags = N->getFlags();
11335 
11336   // fold vector ops
11337   if (VT.isVector())
11338     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11339       return FoldedVOp;
11340 
11341   // fold (fdiv c1, c2) -> c1/c2
11342   if (N0CFP && N1CFP)
11343     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11344 
11345   if (SDValue NewSel = foldBinOpIntoSelect(N))
11346     return NewSel;
11347 
11348   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11349     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11350     if (N1CFP) {
11351       // Compute the reciprocal 1.0 / c2.
11352       const APFloat &N1APF = N1CFP->getValueAPF();
11353       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11354       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11355       // Only do the transform if the reciprocal is a legal fp immediate that
11356       // isn't too nasty (eg NaN, denormal, ...).
11357       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11358           (!LegalOperations ||
11359            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11360            // backend)... we should handle this gracefully after Legalize.
11361            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11362            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11363            TLI.isFPImmLegal(Recip, VT)))
11364         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11365                            DAG.getConstantFP(Recip, DL, VT), Flags);
11366     }
11367 
11368     // If this FDIV is part of a reciprocal square root, it may be folded
11369     // into a target-specific square root estimate instruction.
11370     if (N1.getOpcode() == ISD::FSQRT) {
11371       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11372         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11373       }
11374     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11375                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11376       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11377                                           Flags)) {
11378         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11379         AddToWorklist(RV.getNode());
11380         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11381       }
11382     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11383                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11384       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11385                                           Flags)) {
11386         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11387         AddToWorklist(RV.getNode());
11388         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11389       }
11390     } else if (N1.getOpcode() == ISD::FMUL) {
11391       // Look through an FMUL. Even though this won't remove the FDIV directly,
11392       // it's still worthwhile to get rid of the FSQRT if possible.
11393       SDValue SqrtOp;
11394       SDValue OtherOp;
11395       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11396         SqrtOp = N1.getOperand(0);
11397         OtherOp = N1.getOperand(1);
11398       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11399         SqrtOp = N1.getOperand(1);
11400         OtherOp = N1.getOperand(0);
11401       }
11402       if (SqrtOp.getNode()) {
11403         // We found a FSQRT, so try to make this fold:
11404         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11405         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11406           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11407           AddToWorklist(RV.getNode());
11408           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11409         }
11410       }
11411     }
11412 
11413     // Fold into a reciprocal estimate and multiply instead of a real divide.
11414     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11415       AddToWorklist(RV.getNode());
11416       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11417     }
11418   }
11419 
11420   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11421   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11422     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11423       // Both can be negated for free, check to see if at least one is cheaper
11424       // negated.
11425       if (LHSNeg == 2 || RHSNeg == 2)
11426         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11427                            GetNegatedExpression(N0, DAG, LegalOperations),
11428                            GetNegatedExpression(N1, DAG, LegalOperations),
11429                            Flags);
11430     }
11431   }
11432 
11433   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11434     return CombineRepeatedDivisors;
11435 
11436   return SDValue();
11437 }
11438 
11439 SDValue DAGCombiner::visitFREM(SDNode *N) {
11440   SDValue N0 = N->getOperand(0);
11441   SDValue N1 = N->getOperand(1);
11442   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11443   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11444   EVT VT = N->getValueType(0);
11445 
11446   // fold (frem c1, c2) -> fmod(c1,c2)
11447   if (N0CFP && N1CFP)
11448     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11449 
11450   if (SDValue NewSel = foldBinOpIntoSelect(N))
11451     return NewSel;
11452 
11453   return SDValue();
11454 }
11455 
11456 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11457   SDNodeFlags Flags = N->getFlags();
11458   if (!DAG.getTarget().Options.UnsafeFPMath &&
11459       !Flags.hasApproximateFuncs())
11460     return SDValue();
11461 
11462   SDValue N0 = N->getOperand(0);
11463   if (TLI.isFsqrtCheap(N0, DAG))
11464     return SDValue();
11465 
11466   // FSQRT nodes have flags that propagate to the created nodes.
11467   return buildSqrtEstimate(N0, Flags);
11468 }
11469 
11470 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11471 /// copysign(x, fp_round(y)) -> copysign(x, y)
11472 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11473   SDValue N1 = N->getOperand(1);
11474   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11475        N1.getOpcode() == ISD::FP_ROUND)) {
11476     // Do not optimize out type conversion of f128 type yet.
11477     // For some targets like x86_64, configuration is changed to keep one f128
11478     // value in one SSE register, but instruction selection cannot handle
11479     // FCOPYSIGN on SSE registers yet.
11480     EVT N1VT = N1->getValueType(0);
11481     EVT N1Op0VT = N1->getOperand(0).getValueType();
11482     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11483   }
11484   return false;
11485 }
11486 
11487 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11488   SDValue N0 = N->getOperand(0);
11489   SDValue N1 = N->getOperand(1);
11490   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11491   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11492   EVT VT = N->getValueType(0);
11493 
11494   if (N0CFP && N1CFP) // Constant fold
11495     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11496 
11497   if (N1CFP) {
11498     const APFloat &V = N1CFP->getValueAPF();
11499     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11500     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11501     if (!V.isNegative()) {
11502       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11503         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11504     } else {
11505       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11506         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11507                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11508     }
11509   }
11510 
11511   // copysign(fabs(x), y) -> copysign(x, y)
11512   // copysign(fneg(x), y) -> copysign(x, y)
11513   // copysign(copysign(x,z), y) -> copysign(x, y)
11514   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11515       N0.getOpcode() == ISD::FCOPYSIGN)
11516     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11517 
11518   // copysign(x, abs(y)) -> abs(x)
11519   if (N1.getOpcode() == ISD::FABS)
11520     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11521 
11522   // copysign(x, copysign(y,z)) -> copysign(x, z)
11523   if (N1.getOpcode() == ISD::FCOPYSIGN)
11524     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11525 
11526   // copysign(x, fp_extend(y)) -> copysign(x, y)
11527   // copysign(x, fp_round(y)) -> copysign(x, y)
11528   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11529     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11530 
11531   return SDValue();
11532 }
11533 
11534 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11535                                const TargetLowering &TLI) {
11536   // This optimization is guarded by a function attribute because it may produce
11537   // unexpected results. Ie, programs may be relying on the platform-specific
11538   // undefined behavior when the float-to-int conversion overflows.
11539   const Function &F = DAG.getMachineFunction().getFunction();
11540   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11541   if (StrictOverflow.getValueAsString().equals("false"))
11542     return SDValue();
11543 
11544   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11545   // replacing casts with a libcall. We also must be allowed to ignore -0.0
11546   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11547   // conversions would return +0.0.
11548   // FIXME: We should be able to use node-level FMF here.
11549   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11550   EVT VT = N->getValueType(0);
11551   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11552       !DAG.getTarget().Options.NoSignedZerosFPMath)
11553     return SDValue();
11554 
11555   // fptosi/fptoui round towards zero, so converting from FP to integer and
11556   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11557   SDValue N0 = N->getOperand(0);
11558   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11559       N0.getOperand(0).getValueType() == VT)
11560     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11561 
11562   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11563       N0.getOperand(0).getValueType() == VT)
11564     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11565 
11566   return SDValue();
11567 }
11568 
11569 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11570   SDValue N0 = N->getOperand(0);
11571   EVT VT = N->getValueType(0);
11572   EVT OpVT = N0.getValueType();
11573 
11574   // fold (sint_to_fp c1) -> c1fp
11575   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11576       // ...but only if the target supports immediate floating-point values
11577       (!LegalOperations ||
11578        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11579     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11580 
11581   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11582   // but UINT_TO_FP is legal on this target, try to convert.
11583   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
11584       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
11585     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11586     if (DAG.SignBitIsZero(N0))
11587       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11588   }
11589 
11590   // The next optimizations are desirable only if SELECT_CC can be lowered.
11591   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11592     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11593     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11594         !VT.isVector() &&
11595         (!LegalOperations ||
11596          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11597       SDLoc DL(N);
11598       SDValue Ops[] =
11599         { N0.getOperand(0), N0.getOperand(1),
11600           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11601           N0.getOperand(2) };
11602       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11603     }
11604 
11605     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11606     //      (select_cc x, y, 1.0, 0.0,, cc)
11607     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11608         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11609         (!LegalOperations ||
11610          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11611       SDLoc DL(N);
11612       SDValue Ops[] =
11613         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11614           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11615           N0.getOperand(0).getOperand(2) };
11616       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11617     }
11618   }
11619 
11620   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11621     return FTrunc;
11622 
11623   return SDValue();
11624 }
11625 
11626 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11627   SDValue N0 = N->getOperand(0);
11628   EVT VT = N->getValueType(0);
11629   EVT OpVT = N0.getValueType();
11630 
11631   // fold (uint_to_fp c1) -> c1fp
11632   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11633       // ...but only if the target supports immediate floating-point values
11634       (!LegalOperations ||
11635        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11636     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11637 
11638   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11639   // but SINT_TO_FP is legal on this target, try to convert.
11640   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
11641       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
11642     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11643     if (DAG.SignBitIsZero(N0))
11644       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11645   }
11646 
11647   // The next optimizations are desirable only if SELECT_CC can be lowered.
11648   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11649     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11650     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11651         (!LegalOperations ||
11652          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11653       SDLoc DL(N);
11654       SDValue Ops[] =
11655         { N0.getOperand(0), N0.getOperand(1),
11656           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11657           N0.getOperand(2) };
11658       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11659     }
11660   }
11661 
11662   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11663     return FTrunc;
11664 
11665   return SDValue();
11666 }
11667 
11668 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11669 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11670   SDValue N0 = N->getOperand(0);
11671   EVT VT = N->getValueType(0);
11672 
11673   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11674     return SDValue();
11675 
11676   SDValue Src = N0.getOperand(0);
11677   EVT SrcVT = Src.getValueType();
11678   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11679   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11680 
11681   // We can safely assume the conversion won't overflow the output range,
11682   // because (for example) (uint8_t)18293.f is undefined behavior.
11683 
11684   // Since we can assume the conversion won't overflow, our decision as to
11685   // whether the input will fit in the float should depend on the minimum
11686   // of the input range and output range.
11687 
11688   // This means this is also safe for a signed input and unsigned output, since
11689   // a negative input would lead to undefined behavior.
11690   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11691   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11692   unsigned ActualSize = std::min(InputSize, OutputSize);
11693   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11694 
11695   // We can only fold away the float conversion if the input range can be
11696   // represented exactly in the float range.
11697   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11698     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11699       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11700                                                        : ISD::ZERO_EXTEND;
11701       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11702     }
11703     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11704       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11705     return DAG.getBitcast(VT, Src);
11706   }
11707   return SDValue();
11708 }
11709 
11710 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11711   SDValue N0 = N->getOperand(0);
11712   EVT VT = N->getValueType(0);
11713 
11714   // fold (fp_to_sint c1fp) -> c1
11715   if (isConstantFPBuildVectorOrConstantFP(N0))
11716     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11717 
11718   return FoldIntToFPToInt(N, DAG);
11719 }
11720 
11721 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11722   SDValue N0 = N->getOperand(0);
11723   EVT VT = N->getValueType(0);
11724 
11725   // fold (fp_to_uint c1fp) -> c1
11726   if (isConstantFPBuildVectorOrConstantFP(N0))
11727     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11728 
11729   return FoldIntToFPToInt(N, DAG);
11730 }
11731 
11732 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11733   SDValue N0 = N->getOperand(0);
11734   SDValue N1 = N->getOperand(1);
11735   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11736   EVT VT = N->getValueType(0);
11737 
11738   // fold (fp_round c1fp) -> c1fp
11739   if (N0CFP)
11740     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11741 
11742   // fold (fp_round (fp_extend x)) -> x
11743   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11744     return N0.getOperand(0);
11745 
11746   // fold (fp_round (fp_round x)) -> (fp_round x)
11747   if (N0.getOpcode() == ISD::FP_ROUND) {
11748     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11749     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11750 
11751     // Skip this folding if it results in an fp_round from f80 to f16.
11752     //
11753     // f80 to f16 always generates an expensive (and as yet, unimplemented)
11754     // libcall to __truncxfhf2 instead of selecting native f16 conversion
11755     // instructions from f32 or f64.  Moreover, the first (value-preserving)
11756     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11757     // x86.
11758     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11759       return SDValue();
11760 
11761     // If the first fp_round isn't a value preserving truncation, it might
11762     // introduce a tie in the second fp_round, that wouldn't occur in the
11763     // single-step fp_round we want to fold to.
11764     // In other words, double rounding isn't the same as rounding.
11765     // Also, this is a value preserving truncation iff both fp_round's are.
11766     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11767       SDLoc DL(N);
11768       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11769                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11770     }
11771   }
11772 
11773   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11774   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11775     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11776                               N0.getOperand(0), N1);
11777     AddToWorklist(Tmp.getNode());
11778     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11779                        Tmp, N0.getOperand(1));
11780   }
11781 
11782   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11783     return NewVSel;
11784 
11785   return SDValue();
11786 }
11787 
11788 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11789   SDValue N0 = N->getOperand(0);
11790   EVT VT = N->getValueType(0);
11791   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11792   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11793 
11794   // fold (fp_round_inreg c1fp) -> c1fp
11795   if (N0CFP && isTypeLegal(EVT)) {
11796     SDLoc DL(N);
11797     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11798     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11799   }
11800 
11801   return SDValue();
11802 }
11803 
11804 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11805   SDValue N0 = N->getOperand(0);
11806   EVT VT = N->getValueType(0);
11807 
11808   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11809   if (N->hasOneUse() &&
11810       N->use_begin()->getOpcode() == ISD::FP_ROUND)
11811     return SDValue();
11812 
11813   // fold (fp_extend c1fp) -> c1fp
11814   if (isConstantFPBuildVectorOrConstantFP(N0))
11815     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11816 
11817   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11818   if (N0.getOpcode() == ISD::FP16_TO_FP &&
11819       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11820     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11821 
11822   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11823   // value of X.
11824   if (N0.getOpcode() == ISD::FP_ROUND
11825       && N0.getConstantOperandVal(1) == 1) {
11826     SDValue In = N0.getOperand(0);
11827     if (In.getValueType() == VT) return In;
11828     if (VT.bitsLT(In.getValueType()))
11829       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11830                          In, N0.getOperand(1));
11831     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11832   }
11833 
11834   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11835   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11836        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11837     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11838     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11839                                      LN0->getChain(),
11840                                      LN0->getBasePtr(), N0.getValueType(),
11841                                      LN0->getMemOperand());
11842     CombineTo(N, ExtLoad);
11843     CombineTo(N0.getNode(),
11844               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11845                           N0.getValueType(), ExtLoad,
11846                           DAG.getIntPtrConstant(1, SDLoc(N0))),
11847               ExtLoad.getValue(1));
11848     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11849   }
11850 
11851   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11852     return NewVSel;
11853 
11854   return SDValue();
11855 }
11856 
11857 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
11858   SDValue N0 = N->getOperand(0);
11859   EVT VT = N->getValueType(0);
11860 
11861   // fold (fceil c1) -> fceil(c1)
11862   if (isConstantFPBuildVectorOrConstantFP(N0))
11863     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
11864 
11865   return SDValue();
11866 }
11867 
11868 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
11869   SDValue N0 = N->getOperand(0);
11870   EVT VT = N->getValueType(0);
11871 
11872   // fold (ftrunc c1) -> ftrunc(c1)
11873   if (isConstantFPBuildVectorOrConstantFP(N0))
11874     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
11875 
11876   // fold ftrunc (known rounded int x) -> x
11877   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
11878   // likely to be generated to extract integer from a rounded floating value.
11879   switch (N0.getOpcode()) {
11880   default: break;
11881   case ISD::FRINT:
11882   case ISD::FTRUNC:
11883   case ISD::FNEARBYINT:
11884   case ISD::FFLOOR:
11885   case ISD::FCEIL:
11886     return N0;
11887   }
11888 
11889   return SDValue();
11890 }
11891 
11892 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11893   SDValue N0 = N->getOperand(0);
11894   EVT VT = N->getValueType(0);
11895 
11896   // fold (ffloor c1) -> ffloor(c1)
11897   if (isConstantFPBuildVectorOrConstantFP(N0))
11898     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11899 
11900   return SDValue();
11901 }
11902 
11903 // FIXME: FNEG and FABS have a lot in common; refactor.
11904 SDValue DAGCombiner::visitFNEG(SDNode *N) {
11905   SDValue N0 = N->getOperand(0);
11906   EVT VT = N->getValueType(0);
11907 
11908   // Constant fold FNEG.
11909   if (isConstantFPBuildVectorOrConstantFP(N0))
11910     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11911 
11912   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11913                          &DAG.getTarget().Options))
11914     return GetNegatedExpression(N0, DAG, LegalOperations);
11915 
11916   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11917   // constant pool values.
11918   if (!TLI.isFNegFree(VT) &&
11919       N0.getOpcode() == ISD::BITCAST &&
11920       N0.getNode()->hasOneUse()) {
11921     SDValue Int = N0.getOperand(0);
11922     EVT IntVT = Int.getValueType();
11923     if (IntVT.isInteger() && !IntVT.isVector()) {
11924       APInt SignMask;
11925       if (N0.getValueType().isVector()) {
11926         // For a vector, get a mask such as 0x80... per scalar element
11927         // and splat it.
11928         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11929         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11930       } else {
11931         // For a scalar, just generate 0x80...
11932         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11933       }
11934       SDLoc DL0(N0);
11935       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11936                         DAG.getConstant(SignMask, DL0, IntVT));
11937       AddToWorklist(Int.getNode());
11938       return DAG.getBitcast(VT, Int);
11939     }
11940   }
11941 
11942   // (fneg (fmul c, x)) -> (fmul -c, x)
11943   if (N0.getOpcode() == ISD::FMUL &&
11944       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11945     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11946     if (CFP1) {
11947       APFloat CVal = CFP1->getValueAPF();
11948       CVal.changeSign();
11949       if (Level >= AfterLegalizeDAG &&
11950           (TLI.isFPImmLegal(CVal, VT) ||
11951            TLI.isOperationLegal(ISD::ConstantFP, VT)))
11952         return DAG.getNode(
11953             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11954             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11955             N0->getFlags());
11956     }
11957   }
11958 
11959   return SDValue();
11960 }
11961 
11962 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11963   SDValue N0 = N->getOperand(0);
11964   SDValue N1 = N->getOperand(1);
11965   EVT VT = N->getValueType(0);
11966   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11967   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11968 
11969   if (N0CFP && N1CFP) {
11970     const APFloat &C0 = N0CFP->getValueAPF();
11971     const APFloat &C1 = N1CFP->getValueAPF();
11972     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11973   }
11974 
11975   // Canonicalize to constant on RHS.
11976   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11977      !isConstantFPBuildVectorOrConstantFP(N1))
11978     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11979 
11980   return SDValue();
11981 }
11982 
11983 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11984   SDValue N0 = N->getOperand(0);
11985   SDValue N1 = N->getOperand(1);
11986   EVT VT = N->getValueType(0);
11987   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11988   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11989 
11990   if (N0CFP && N1CFP) {
11991     const APFloat &C0 = N0CFP->getValueAPF();
11992     const APFloat &C1 = N1CFP->getValueAPF();
11993     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11994   }
11995 
11996   // Canonicalize to constant on RHS.
11997   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11998      !isConstantFPBuildVectorOrConstantFP(N1))
11999     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
12000 
12001   return SDValue();
12002 }
12003 
12004 SDValue DAGCombiner::visitFABS(SDNode *N) {
12005   SDValue N0 = N->getOperand(0);
12006   EVT VT = N->getValueType(0);
12007 
12008   // fold (fabs c1) -> fabs(c1)
12009   if (isConstantFPBuildVectorOrConstantFP(N0))
12010     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12011 
12012   // fold (fabs (fabs x)) -> (fabs x)
12013   if (N0.getOpcode() == ISD::FABS)
12014     return N->getOperand(0);
12015 
12016   // fold (fabs (fneg x)) -> (fabs x)
12017   // fold (fabs (fcopysign x, y)) -> (fabs x)
12018   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12019     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12020 
12021   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
12022   // constant pool values.
12023   if (!TLI.isFAbsFree(VT) &&
12024       N0.getOpcode() == ISD::BITCAST &&
12025       N0.getNode()->hasOneUse()) {
12026     SDValue Int = N0.getOperand(0);
12027     EVT IntVT = Int.getValueType();
12028     if (IntVT.isInteger() && !IntVT.isVector()) {
12029       APInt SignMask;
12030       if (N0.getValueType().isVector()) {
12031         // For a vector, get a mask such as 0x7f... per scalar element
12032         // and splat it.
12033         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12034         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12035       } else {
12036         // For a scalar, just generate 0x7f...
12037         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12038       }
12039       SDLoc DL(N0);
12040       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12041                         DAG.getConstant(SignMask, DL, IntVT));
12042       AddToWorklist(Int.getNode());
12043       return DAG.getBitcast(N->getValueType(0), Int);
12044     }
12045   }
12046 
12047   return SDValue();
12048 }
12049 
12050 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12051   SDValue Chain = N->getOperand(0);
12052   SDValue N1 = N->getOperand(1);
12053   SDValue N2 = N->getOperand(2);
12054 
12055   // If N is a constant we could fold this into a fallthrough or unconditional
12056   // branch. However that doesn't happen very often in normal code, because
12057   // Instcombine/SimplifyCFG should have handled the available opportunities.
12058   // If we did this folding here, it would be necessary to update the
12059   // MachineBasicBlock CFG, which is awkward.
12060 
12061   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12062   // on the target.
12063   if (N1.getOpcode() == ISD::SETCC &&
12064       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12065                                    N1.getOperand(0).getValueType())) {
12066     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12067                        Chain, N1.getOperand(2),
12068                        N1.getOperand(0), N1.getOperand(1), N2);
12069   }
12070 
12071   if (N1.hasOneUse()) {
12072     if (SDValue NewN1 = rebuildSetCC(N1))
12073       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12074   }
12075 
12076   return SDValue();
12077 }
12078 
12079 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12080   if (N.getOpcode() == ISD::SRL ||
12081       (N.getOpcode() == ISD::TRUNCATE &&
12082        (N.getOperand(0).hasOneUse() &&
12083         N.getOperand(0).getOpcode() == ISD::SRL))) {
12084     // Look pass the truncate.
12085     if (N.getOpcode() == ISD::TRUNCATE)
12086       N = N.getOperand(0);
12087 
12088     // Match this pattern so that we can generate simpler code:
12089     //
12090     //   %a = ...
12091     //   %b = and i32 %a, 2
12092     //   %c = srl i32 %b, 1
12093     //   brcond i32 %c ...
12094     //
12095     // into
12096     //
12097     //   %a = ...
12098     //   %b = and i32 %a, 2
12099     //   %c = setcc eq %b, 0
12100     //   brcond %c ...
12101     //
12102     // This applies only when the AND constant value has one bit set and the
12103     // SRL constant is equal to the log2 of the AND constant. The back-end is
12104     // smart enough to convert the result into a TEST/JMP sequence.
12105     SDValue Op0 = N.getOperand(0);
12106     SDValue Op1 = N.getOperand(1);
12107 
12108     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12109       SDValue AndOp1 = Op0.getOperand(1);
12110 
12111       if (AndOp1.getOpcode() == ISD::Constant) {
12112         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12113 
12114         if (AndConst.isPowerOf2() &&
12115             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12116           SDLoc DL(N);
12117           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12118                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12119                               ISD::SETNE);
12120         }
12121       }
12122     }
12123   }
12124 
12125   // Transform br(xor(x, y)) -> br(x != y)
12126   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12127   if (N.getOpcode() == ISD::XOR) {
12128     // Because we may call this on a speculatively constructed
12129     // SimplifiedSetCC Node, we need to simplify this node first.
12130     // Ideally this should be folded into SimplifySetCC and not
12131     // here. For now, grab a handle to N so we don't lose it from
12132     // replacements interal to the visit.
12133     HandleSDNode XORHandle(N);
12134     while (N.getOpcode() == ISD::XOR) {
12135       SDValue Tmp = visitXOR(N.getNode());
12136       // No simplification done.
12137       if (!Tmp.getNode())
12138         break;
12139       // Returning N is form in-visit replacement that may invalidated
12140       // N. Grab value from Handle.
12141       if (Tmp.getNode() == N.getNode())
12142         N = XORHandle.getValue();
12143       else // Node simplified. Try simplifying again.
12144         N = Tmp;
12145     }
12146 
12147     if (N.getOpcode() != ISD::XOR)
12148       return N;
12149 
12150     SDNode *TheXor = N.getNode();
12151 
12152     SDValue Op0 = TheXor->getOperand(0);
12153     SDValue Op1 = TheXor->getOperand(1);
12154 
12155     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12156       bool Equal = false;
12157       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12158           Op0.getOpcode() == ISD::XOR) {
12159         TheXor = Op0.getNode();
12160         Equal = true;
12161       }
12162 
12163       EVT SetCCVT = N.getValueType();
12164       if (LegalTypes)
12165         SetCCVT = getSetCCResultType(SetCCVT);
12166       // Replace the uses of XOR with SETCC
12167       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12168                           Equal ? ISD::SETEQ : ISD::SETNE);
12169     }
12170   }
12171 
12172   return SDValue();
12173 }
12174 
12175 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12176 //
12177 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12178   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12179   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12180 
12181   // If N is a constant we could fold this into a fallthrough or unconditional
12182   // branch. However that doesn't happen very often in normal code, because
12183   // Instcombine/SimplifyCFG should have handled the available opportunities.
12184   // If we did this folding here, it would be necessary to update the
12185   // MachineBasicBlock CFG, which is awkward.
12186 
12187   // Use SimplifySetCC to simplify SETCC's.
12188   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12189                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12190                                false);
12191   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12192 
12193   // fold to a simpler setcc
12194   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12195     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12196                        N->getOperand(0), Simp.getOperand(2),
12197                        Simp.getOperand(0), Simp.getOperand(1),
12198                        N->getOperand(4));
12199 
12200   return SDValue();
12201 }
12202 
12203 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12204 /// and that N may be folded in the load / store addressing mode.
12205 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12206                                     SelectionDAG &DAG,
12207                                     const TargetLowering &TLI) {
12208   EVT VT;
12209   unsigned AS;
12210 
12211   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12212     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12213       return false;
12214     VT = LD->getMemoryVT();
12215     AS = LD->getAddressSpace();
12216   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12217     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12218       return false;
12219     VT = ST->getMemoryVT();
12220     AS = ST->getAddressSpace();
12221   } else
12222     return false;
12223 
12224   TargetLowering::AddrMode AM;
12225   if (N->getOpcode() == ISD::ADD) {
12226     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12227     if (Offset)
12228       // [reg +/- imm]
12229       AM.BaseOffs = Offset->getSExtValue();
12230     else
12231       // [reg +/- reg]
12232       AM.Scale = 1;
12233   } else if (N->getOpcode() == ISD::SUB) {
12234     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12235     if (Offset)
12236       // [reg +/- imm]
12237       AM.BaseOffs = -Offset->getSExtValue();
12238     else
12239       // [reg +/- reg]
12240       AM.Scale = 1;
12241   } else
12242     return false;
12243 
12244   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12245                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12246 }
12247 
12248 /// Try turning a load/store into a pre-indexed load/store when the base
12249 /// pointer is an add or subtract and it has other uses besides the load/store.
12250 /// After the transformation, the new indexed load/store has effectively folded
12251 /// the add/subtract in and all of its other uses are redirected to the
12252 /// new load/store.
12253 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12254   if (Level < AfterLegalizeDAG)
12255     return false;
12256 
12257   bool isLoad = true;
12258   SDValue Ptr;
12259   EVT VT;
12260   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12261     if (LD->isIndexed())
12262       return false;
12263     VT = LD->getMemoryVT();
12264     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12265         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12266       return false;
12267     Ptr = LD->getBasePtr();
12268   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12269     if (ST->isIndexed())
12270       return false;
12271     VT = ST->getMemoryVT();
12272     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12273         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12274       return false;
12275     Ptr = ST->getBasePtr();
12276     isLoad = false;
12277   } else {
12278     return false;
12279   }
12280 
12281   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12282   // out.  There is no reason to make this a preinc/predec.
12283   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12284       Ptr.getNode()->hasOneUse())
12285     return false;
12286 
12287   // Ask the target to do addressing mode selection.
12288   SDValue BasePtr;
12289   SDValue Offset;
12290   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12291   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12292     return false;
12293 
12294   // Backends without true r+i pre-indexed forms may need to pass a
12295   // constant base with a variable offset so that constant coercion
12296   // will work with the patterns in canonical form.
12297   bool Swapped = false;
12298   if (isa<ConstantSDNode>(BasePtr)) {
12299     std::swap(BasePtr, Offset);
12300     Swapped = true;
12301   }
12302 
12303   // Don't create a indexed load / store with zero offset.
12304   if (isNullConstant(Offset))
12305     return false;
12306 
12307   // Try turning it into a pre-indexed load / store except when:
12308   // 1) The new base ptr is a frame index.
12309   // 2) If N is a store and the new base ptr is either the same as or is a
12310   //    predecessor of the value being stored.
12311   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12312   //    that would create a cycle.
12313   // 4) All uses are load / store ops that use it as old base ptr.
12314 
12315   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12316   // (plus the implicit offset) to a register to preinc anyway.
12317   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12318     return false;
12319 
12320   // Check #2.
12321   if (!isLoad) {
12322     SDValue Val = cast<StoreSDNode>(N)->getValue();
12323     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12324       return false;
12325   }
12326 
12327   // Caches for hasPredecessorHelper.
12328   SmallPtrSet<const SDNode *, 32> Visited;
12329   SmallVector<const SDNode *, 16> Worklist;
12330   Worklist.push_back(N);
12331 
12332   // If the offset is a constant, there may be other adds of constants that
12333   // can be folded with this one. We should do this to avoid having to keep
12334   // a copy of the original base pointer.
12335   SmallVector<SDNode *, 16> OtherUses;
12336   if (isa<ConstantSDNode>(Offset))
12337     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12338                               UE = BasePtr.getNode()->use_end();
12339          UI != UE; ++UI) {
12340       SDUse &Use = UI.getUse();
12341       // Skip the use that is Ptr and uses of other results from BasePtr's
12342       // node (important for nodes that return multiple results).
12343       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12344         continue;
12345 
12346       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12347         continue;
12348 
12349       if (Use.getUser()->getOpcode() != ISD::ADD &&
12350           Use.getUser()->getOpcode() != ISD::SUB) {
12351         OtherUses.clear();
12352         break;
12353       }
12354 
12355       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12356       if (!isa<ConstantSDNode>(Op1)) {
12357         OtherUses.clear();
12358         break;
12359       }
12360 
12361       // FIXME: In some cases, we can be smarter about this.
12362       if (Op1.getValueType() != Offset.getValueType()) {
12363         OtherUses.clear();
12364         break;
12365       }
12366 
12367       OtherUses.push_back(Use.getUser());
12368     }
12369 
12370   if (Swapped)
12371     std::swap(BasePtr, Offset);
12372 
12373   // Now check for #3 and #4.
12374   bool RealUse = false;
12375 
12376   for (SDNode *Use : Ptr.getNode()->uses()) {
12377     if (Use == N)
12378       continue;
12379     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12380       return false;
12381 
12382     // If Ptr may be folded in addressing mode of other use, then it's
12383     // not profitable to do this transformation.
12384     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12385       RealUse = true;
12386   }
12387 
12388   if (!RealUse)
12389     return false;
12390 
12391   SDValue Result;
12392   if (isLoad)
12393     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12394                                 BasePtr, Offset, AM);
12395   else
12396     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12397                                  BasePtr, Offset, AM);
12398   ++PreIndexedNodes;
12399   ++NodesCombined;
12400   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12401              Result.getNode()->dump(&DAG); dbgs() << '\n');
12402   WorklistRemover DeadNodes(*this);
12403   if (isLoad) {
12404     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12405     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12406   } else {
12407     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12408   }
12409 
12410   // Finally, since the node is now dead, remove it from the graph.
12411   deleteAndRecombine(N);
12412 
12413   if (Swapped)
12414     std::swap(BasePtr, Offset);
12415 
12416   // Replace other uses of BasePtr that can be updated to use Ptr
12417   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12418     unsigned OffsetIdx = 1;
12419     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12420       OffsetIdx = 0;
12421     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12422            BasePtr.getNode() && "Expected BasePtr operand");
12423 
12424     // We need to replace ptr0 in the following expression:
12425     //   x0 * offset0 + y0 * ptr0 = t0
12426     // knowing that
12427     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12428     //
12429     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12430     // indexed load/store and the expression that needs to be re-written.
12431     //
12432     // Therefore, we have:
12433     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12434 
12435     ConstantSDNode *CN =
12436       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12437     int X0, X1, Y0, Y1;
12438     const APInt &Offset0 = CN->getAPIntValue();
12439     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12440 
12441     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12442     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12443     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12444     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12445 
12446     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12447 
12448     APInt CNV = Offset0;
12449     if (X0 < 0) CNV = -CNV;
12450     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12451     else CNV = CNV - Offset1;
12452 
12453     SDLoc DL(OtherUses[i]);
12454 
12455     // We can now generate the new expression.
12456     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12457     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12458 
12459     SDValue NewUse = DAG.getNode(Opcode,
12460                                  DL,
12461                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12462     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12463     deleteAndRecombine(OtherUses[i]);
12464   }
12465 
12466   // Replace the uses of Ptr with uses of the updated base value.
12467   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12468   deleteAndRecombine(Ptr.getNode());
12469   AddToWorklist(Result.getNode());
12470 
12471   return true;
12472 }
12473 
12474 /// Try to combine a load/store with a add/sub of the base pointer node into a
12475 /// post-indexed load/store. The transformation folded the add/subtract into the
12476 /// new indexed load/store effectively and all of its uses are redirected to the
12477 /// new load/store.
12478 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12479   if (Level < AfterLegalizeDAG)
12480     return false;
12481 
12482   bool isLoad = true;
12483   SDValue Ptr;
12484   EVT VT;
12485   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12486     if (LD->isIndexed())
12487       return false;
12488     VT = LD->getMemoryVT();
12489     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12490         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12491       return false;
12492     Ptr = LD->getBasePtr();
12493   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12494     if (ST->isIndexed())
12495       return false;
12496     VT = ST->getMemoryVT();
12497     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12498         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12499       return false;
12500     Ptr = ST->getBasePtr();
12501     isLoad = false;
12502   } else {
12503     return false;
12504   }
12505 
12506   if (Ptr.getNode()->hasOneUse())
12507     return false;
12508 
12509   for (SDNode *Op : Ptr.getNode()->uses()) {
12510     if (Op == N ||
12511         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12512       continue;
12513 
12514     SDValue BasePtr;
12515     SDValue Offset;
12516     ISD::MemIndexedMode AM = ISD::UNINDEXED;
12517     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12518       // Don't create a indexed load / store with zero offset.
12519       if (isNullConstant(Offset))
12520         continue;
12521 
12522       // Try turning it into a post-indexed load / store except when
12523       // 1) All uses are load / store ops that use it as base ptr (and
12524       //    it may be folded as addressing mmode).
12525       // 2) Op must be independent of N, i.e. Op is neither a predecessor
12526       //    nor a successor of N. Otherwise, if Op is folded that would
12527       //    create a cycle.
12528 
12529       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12530         continue;
12531 
12532       // Check for #1.
12533       bool TryNext = false;
12534       for (SDNode *Use : BasePtr.getNode()->uses()) {
12535         if (Use == Ptr.getNode())
12536           continue;
12537 
12538         // If all the uses are load / store addresses, then don't do the
12539         // transformation.
12540         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12541           bool RealUse = false;
12542           for (SDNode *UseUse : Use->uses()) {
12543             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12544               RealUse = true;
12545           }
12546 
12547           if (!RealUse) {
12548             TryNext = true;
12549             break;
12550           }
12551         }
12552       }
12553 
12554       if (TryNext)
12555         continue;
12556 
12557       // Check for #2
12558       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
12559         SDValue Result = isLoad
12560           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12561                                BasePtr, Offset, AM)
12562           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12563                                 BasePtr, Offset, AM);
12564         ++PostIndexedNodes;
12565         ++NodesCombined;
12566         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12567                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12568                    dbgs() << '\n');
12569         WorklistRemover DeadNodes(*this);
12570         if (isLoad) {
12571           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12572           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12573         } else {
12574           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12575         }
12576 
12577         // Finally, since the node is now dead, remove it from the graph.
12578         deleteAndRecombine(N);
12579 
12580         // Replace the uses of Use with uses of the updated base value.
12581         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12582                                       Result.getValue(isLoad ? 1 : 0));
12583         deleteAndRecombine(Op);
12584         return true;
12585       }
12586     }
12587   }
12588 
12589   return false;
12590 }
12591 
12592 /// Return the base-pointer arithmetic from an indexed \p LD.
12593 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12594   ISD::MemIndexedMode AM = LD->getAddressingMode();
12595   assert(AM != ISD::UNINDEXED);
12596   SDValue BP = LD->getOperand(1);
12597   SDValue Inc = LD->getOperand(2);
12598 
12599   // Some backends use TargetConstants for load offsets, but don't expect
12600   // TargetConstants in general ADD nodes. We can convert these constants into
12601   // regular Constants (if the constant is not opaque).
12602   assert((Inc.getOpcode() != ISD::TargetConstant ||
12603           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12604          "Cannot split out indexing using opaque target constants");
12605   if (Inc.getOpcode() == ISD::TargetConstant) {
12606     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12607     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12608                           ConstInc->getValueType(0));
12609   }
12610 
12611   unsigned Opc =
12612       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12613   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12614 }
12615 
12616 SDValue DAGCombiner::visitLOAD(SDNode *N) {
12617   LoadSDNode *LD  = cast<LoadSDNode>(N);
12618   SDValue Chain = LD->getChain();
12619   SDValue Ptr   = LD->getBasePtr();
12620 
12621   // If load is not volatile and there are no uses of the loaded value (and
12622   // the updated indexed value in case of indexed loads), change uses of the
12623   // chain value into uses of the chain input (i.e. delete the dead load).
12624   if (!LD->isVolatile()) {
12625     if (N->getValueType(1) == MVT::Other) {
12626       // Unindexed loads.
12627       if (!N->hasAnyUseOfValue(0)) {
12628         // It's not safe to use the two value CombineTo variant here. e.g.
12629         // v1, chain2 = load chain1, loc
12630         // v2, chain3 = load chain2, loc
12631         // v3         = add v2, c
12632         // Now we replace use of chain2 with chain1.  This makes the second load
12633         // isomorphic to the one we are deleting, and thus makes this load live.
12634         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12635                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12636                    dbgs() << "\n");
12637         WorklistRemover DeadNodes(*this);
12638         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12639         AddUsersToWorklist(Chain.getNode());
12640         if (N->use_empty())
12641           deleteAndRecombine(N);
12642 
12643         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12644       }
12645     } else {
12646       // Indexed loads.
12647       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12648 
12649       // If this load has an opaque TargetConstant offset, then we cannot split
12650       // the indexing into an add/sub directly (that TargetConstant may not be
12651       // valid for a different type of node, and we cannot convert an opaque
12652       // target constant into a regular constant).
12653       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12654                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12655 
12656       if (!N->hasAnyUseOfValue(0) &&
12657           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12658         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12659         SDValue Index;
12660         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12661           Index = SplitIndexingFromLoad(LD);
12662           // Try to fold the base pointer arithmetic into subsequent loads and
12663           // stores.
12664           AddUsersToWorklist(N);
12665         } else
12666           Index = DAG.getUNDEF(N->getValueType(1));
12667         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12668                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12669                    dbgs() << " and 2 other values\n");
12670         WorklistRemover DeadNodes(*this);
12671         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12672         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12673         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12674         deleteAndRecombine(N);
12675         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12676       }
12677     }
12678   }
12679 
12680   // If this load is directly stored, replace the load value with the stored
12681   // value.
12682   // TODO: Handle store large -> read small portion.
12683   // TODO: Handle TRUNCSTORE/LOADEXT
12684   if (OptLevel != CodeGenOpt::None &&
12685       ISD::isNormalLoad(N) && !LD->isVolatile()) {
12686     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
12687       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
12688       if (PrevST->getBasePtr() == Ptr &&
12689           PrevST->getValue().getValueType() == N->getValueType(0))
12690         return CombineTo(N, PrevST->getOperand(1), Chain);
12691     }
12692   }
12693 
12694   // Try to infer better alignment information than the load already has.
12695   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12696     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12697       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12698         SDValue NewLoad = DAG.getExtLoad(
12699             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12700             LD->getPointerInfo(), LD->getMemoryVT(), Align,
12701             LD->getMemOperand()->getFlags(), LD->getAAInfo());
12702         // NewLoad will always be N as we are only refining the alignment
12703         assert(NewLoad.getNode() == N);
12704         (void)NewLoad;
12705       }
12706     }
12707   }
12708 
12709   if (LD->isUnindexed()) {
12710     // Walk up chain skipping non-aliasing memory nodes.
12711     SDValue BetterChain = FindBetterChain(N, Chain);
12712 
12713     // If there is a better chain.
12714     if (Chain != BetterChain) {
12715       SDValue ReplLoad;
12716 
12717       // Replace the chain to void dependency.
12718       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
12719         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
12720                                BetterChain, Ptr, LD->getMemOperand());
12721       } else {
12722         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
12723                                   LD->getValueType(0),
12724                                   BetterChain, Ptr, LD->getMemoryVT(),
12725                                   LD->getMemOperand());
12726       }
12727 
12728       // Create token factor to keep old chain connected.
12729       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
12730                                   MVT::Other, Chain, ReplLoad.getValue(1));
12731 
12732       // Replace uses with load result and token factor
12733       return CombineTo(N, ReplLoad.getValue(0), Token);
12734     }
12735   }
12736 
12737   // Try transforming N to an indexed load.
12738   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12739     return SDValue(N, 0);
12740 
12741   // Try to slice up N to more direct loads if the slices are mapped to
12742   // different register banks or pairing can take place.
12743   if (SliceUpLoad(N))
12744     return SDValue(N, 0);
12745 
12746   return SDValue();
12747 }
12748 
12749 namespace {
12750 
12751 /// Helper structure used to slice a load in smaller loads.
12752 /// Basically a slice is obtained from the following sequence:
12753 /// Origin = load Ty1, Base
12754 /// Shift = srl Ty1 Origin, CstTy Amount
12755 /// Inst = trunc Shift to Ty2
12756 ///
12757 /// Then, it will be rewritten into:
12758 /// Slice = load SliceTy, Base + SliceOffset
12759 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
12760 ///
12761 /// SliceTy is deduced from the number of bits that are actually used to
12762 /// build Inst.
12763 struct LoadedSlice {
12764   /// Helper structure used to compute the cost of a slice.
12765   struct Cost {
12766     /// Are we optimizing for code size.
12767     bool ForCodeSize;
12768 
12769     /// Various cost.
12770     unsigned Loads = 0;
12771     unsigned Truncates = 0;
12772     unsigned CrossRegisterBanksCopies = 0;
12773     unsigned ZExts = 0;
12774     unsigned Shift = 0;
12775 
12776     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
12777 
12778     /// Get the cost of one isolated slice.
12779     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
12780         : ForCodeSize(ForCodeSize), Loads(1) {
12781       EVT TruncType = LS.Inst->getValueType(0);
12782       EVT LoadedType = LS.getLoadedType();
12783       if (TruncType != LoadedType &&
12784           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
12785         ZExts = 1;
12786     }
12787 
12788     /// Account for slicing gain in the current cost.
12789     /// Slicing provide a few gains like removing a shift or a
12790     /// truncate. This method allows to grow the cost of the original
12791     /// load with the gain from this slice.
12792     void addSliceGain(const LoadedSlice &LS) {
12793       // Each slice saves a truncate.
12794       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
12795       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
12796                               LS.Inst->getValueType(0)))
12797         ++Truncates;
12798       // If there is a shift amount, this slice gets rid of it.
12799       if (LS.Shift)
12800         ++Shift;
12801       // If this slice can merge a cross register bank copy, account for it.
12802       if (LS.canMergeExpensiveCrossRegisterBankCopy())
12803         ++CrossRegisterBanksCopies;
12804     }
12805 
12806     Cost &operator+=(const Cost &RHS) {
12807       Loads += RHS.Loads;
12808       Truncates += RHS.Truncates;
12809       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
12810       ZExts += RHS.ZExts;
12811       Shift += RHS.Shift;
12812       return *this;
12813     }
12814 
12815     bool operator==(const Cost &RHS) const {
12816       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
12817              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
12818              ZExts == RHS.ZExts && Shift == RHS.Shift;
12819     }
12820 
12821     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
12822 
12823     bool operator<(const Cost &RHS) const {
12824       // Assume cross register banks copies are as expensive as loads.
12825       // FIXME: Do we want some more target hooks?
12826       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
12827       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
12828       // Unless we are optimizing for code size, consider the
12829       // expensive operation first.
12830       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
12831         return ExpensiveOpsLHS < ExpensiveOpsRHS;
12832       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
12833              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
12834     }
12835 
12836     bool operator>(const Cost &RHS) const { return RHS < *this; }
12837 
12838     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
12839 
12840     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
12841   };
12842 
12843   // The last instruction that represent the slice. This should be a
12844   // truncate instruction.
12845   SDNode *Inst;
12846 
12847   // The original load instruction.
12848   LoadSDNode *Origin;
12849 
12850   // The right shift amount in bits from the original load.
12851   unsigned Shift;
12852 
12853   // The DAG from which Origin came from.
12854   // This is used to get some contextual information about legal types, etc.
12855   SelectionDAG *DAG;
12856 
12857   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
12858               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
12859       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
12860 
12861   /// Get the bits used in a chunk of bits \p BitWidth large.
12862   /// \return Result is \p BitWidth and has used bits set to 1 and
12863   ///         not used bits set to 0.
12864   APInt getUsedBits() const {
12865     // Reproduce the trunc(lshr) sequence:
12866     // - Start from the truncated value.
12867     // - Zero extend to the desired bit width.
12868     // - Shift left.
12869     assert(Origin && "No original load to compare against.");
12870     unsigned BitWidth = Origin->getValueSizeInBits(0);
12871     assert(Inst && "This slice is not bound to an instruction");
12872     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
12873            "Extracted slice is bigger than the whole type!");
12874     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
12875     UsedBits.setAllBits();
12876     UsedBits = UsedBits.zext(BitWidth);
12877     UsedBits <<= Shift;
12878     return UsedBits;
12879   }
12880 
12881   /// Get the size of the slice to be loaded in bytes.
12882   unsigned getLoadedSize() const {
12883     unsigned SliceSize = getUsedBits().countPopulation();
12884     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
12885     return SliceSize / 8;
12886   }
12887 
12888   /// Get the type that will be loaded for this slice.
12889   /// Note: This may not be the final type for the slice.
12890   EVT getLoadedType() const {
12891     assert(DAG && "Missing context");
12892     LLVMContext &Ctxt = *DAG->getContext();
12893     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
12894   }
12895 
12896   /// Get the alignment of the load used for this slice.
12897   unsigned getAlignment() const {
12898     unsigned Alignment = Origin->getAlignment();
12899     unsigned Offset = getOffsetFromBase();
12900     if (Offset != 0)
12901       Alignment = MinAlign(Alignment, Alignment + Offset);
12902     return Alignment;
12903   }
12904 
12905   /// Check if this slice can be rewritten with legal operations.
12906   bool isLegal() const {
12907     // An invalid slice is not legal.
12908     if (!Origin || !Inst || !DAG)
12909       return false;
12910 
12911     // Offsets are for indexed load only, we do not handle that.
12912     if (!Origin->getOffset().isUndef())
12913       return false;
12914 
12915     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12916 
12917     // Check that the type is legal.
12918     EVT SliceType = getLoadedType();
12919     if (!TLI.isTypeLegal(SliceType))
12920       return false;
12921 
12922     // Check that the load is legal for this type.
12923     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12924       return false;
12925 
12926     // Check that the offset can be computed.
12927     // 1. Check its type.
12928     EVT PtrType = Origin->getBasePtr().getValueType();
12929     if (PtrType == MVT::Untyped || PtrType.isExtended())
12930       return false;
12931 
12932     // 2. Check that it fits in the immediate.
12933     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12934       return false;
12935 
12936     // 3. Check that the computation is legal.
12937     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12938       return false;
12939 
12940     // Check that the zext is legal if it needs one.
12941     EVT TruncateType = Inst->getValueType(0);
12942     if (TruncateType != SliceType &&
12943         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12944       return false;
12945 
12946     return true;
12947   }
12948 
12949   /// Get the offset in bytes of this slice in the original chunk of
12950   /// bits.
12951   /// \pre DAG != nullptr.
12952   uint64_t getOffsetFromBase() const {
12953     assert(DAG && "Missing context.");
12954     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12955     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12956     uint64_t Offset = Shift / 8;
12957     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12958     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12959            "The size of the original loaded type is not a multiple of a"
12960            " byte.");
12961     // If Offset is bigger than TySizeInBytes, it means we are loading all
12962     // zeros. This should have been optimized before in the process.
12963     assert(TySizeInBytes > Offset &&
12964            "Invalid shift amount for given loaded size");
12965     if (IsBigEndian)
12966       Offset = TySizeInBytes - Offset - getLoadedSize();
12967     return Offset;
12968   }
12969 
12970   /// Generate the sequence of instructions to load the slice
12971   /// represented by this object and redirect the uses of this slice to
12972   /// this new sequence of instructions.
12973   /// \pre this->Inst && this->Origin are valid Instructions and this
12974   /// object passed the legal check: LoadedSlice::isLegal returned true.
12975   /// \return The last instruction of the sequence used to load the slice.
12976   SDValue loadSlice() const {
12977     assert(Inst && Origin && "Unable to replace a non-existing slice.");
12978     const SDValue &OldBaseAddr = Origin->getBasePtr();
12979     SDValue BaseAddr = OldBaseAddr;
12980     // Get the offset in that chunk of bytes w.r.t. the endianness.
12981     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12982     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12983     if (Offset) {
12984       // BaseAddr = BaseAddr + Offset.
12985       EVT ArithType = BaseAddr.getValueType();
12986       SDLoc DL(Origin);
12987       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12988                               DAG->getConstant(Offset, DL, ArithType));
12989     }
12990 
12991     // Create the type of the loaded slice according to its size.
12992     EVT SliceType = getLoadedType();
12993 
12994     // Create the load for the slice.
12995     SDValue LastInst =
12996         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12997                      Origin->getPointerInfo().getWithOffset(Offset),
12998                      getAlignment(), Origin->getMemOperand()->getFlags());
12999     // If the final type is not the same as the loaded type, this means that
13000     // we have to pad with zero. Create a zero extend for that.
13001     EVT FinalType = Inst->getValueType(0);
13002     if (SliceType != FinalType)
13003       LastInst =
13004           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13005     return LastInst;
13006   }
13007 
13008   /// Check if this slice can be merged with an expensive cross register
13009   /// bank copy. E.g.,
13010   /// i = load i32
13011   /// f = bitcast i32 i to float
13012   bool canMergeExpensiveCrossRegisterBankCopy() const {
13013     if (!Inst || !Inst->hasOneUse())
13014       return false;
13015     SDNode *Use = *Inst->use_begin();
13016     if (Use->getOpcode() != ISD::BITCAST)
13017       return false;
13018     assert(DAG && "Missing context");
13019     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13020     EVT ResVT = Use->getValueType(0);
13021     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13022     const TargetRegisterClass *ArgRC =
13023         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13024     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13025       return false;
13026 
13027     // At this point, we know that we perform a cross-register-bank copy.
13028     // Check if it is expensive.
13029     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13030     // Assume bitcasts are cheap, unless both register classes do not
13031     // explicitly share a common sub class.
13032     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13033       return false;
13034 
13035     // Check if it will be merged with the load.
13036     // 1. Check the alignment constraint.
13037     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13038         ResVT.getTypeForEVT(*DAG->getContext()));
13039 
13040     if (RequiredAlignment > getAlignment())
13041       return false;
13042 
13043     // 2. Check that the load is a legal operation for that type.
13044     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13045       return false;
13046 
13047     // 3. Check that we do not have a zext in the way.
13048     if (Inst->getValueType(0) != getLoadedType())
13049       return false;
13050 
13051     return true;
13052   }
13053 };
13054 
13055 } // end anonymous namespace
13056 
13057 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13058 /// \p UsedBits looks like 0..0 1..1 0..0.
13059 static bool areUsedBitsDense(const APInt &UsedBits) {
13060   // If all the bits are one, this is dense!
13061   if (UsedBits.isAllOnesValue())
13062     return true;
13063 
13064   // Get rid of the unused bits on the right.
13065   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13066   // Get rid of the unused bits on the left.
13067   if (NarrowedUsedBits.countLeadingZeros())
13068     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13069   // Check that the chunk of bits is completely used.
13070   return NarrowedUsedBits.isAllOnesValue();
13071 }
13072 
13073 /// Check whether or not \p First and \p Second are next to each other
13074 /// in memory. This means that there is no hole between the bits loaded
13075 /// by \p First and the bits loaded by \p Second.
13076 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13077                                      const LoadedSlice &Second) {
13078   assert(First.Origin == Second.Origin && First.Origin &&
13079          "Unable to match different memory origins.");
13080   APInt UsedBits = First.getUsedBits();
13081   assert((UsedBits & Second.getUsedBits()) == 0 &&
13082          "Slices are not supposed to overlap.");
13083   UsedBits |= Second.getUsedBits();
13084   return areUsedBitsDense(UsedBits);
13085 }
13086 
13087 /// Adjust the \p GlobalLSCost according to the target
13088 /// paring capabilities and the layout of the slices.
13089 /// \pre \p GlobalLSCost should account for at least as many loads as
13090 /// there is in the slices in \p LoadedSlices.
13091 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13092                                  LoadedSlice::Cost &GlobalLSCost) {
13093   unsigned NumberOfSlices = LoadedSlices.size();
13094   // If there is less than 2 elements, no pairing is possible.
13095   if (NumberOfSlices < 2)
13096     return;
13097 
13098   // Sort the slices so that elements that are likely to be next to each
13099   // other in memory are next to each other in the list.
13100   llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
13101              [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13102     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13103     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13104   });
13105   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13106   // First (resp. Second) is the first (resp. Second) potentially candidate
13107   // to be placed in a paired load.
13108   const LoadedSlice *First = nullptr;
13109   const LoadedSlice *Second = nullptr;
13110   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13111                 // Set the beginning of the pair.
13112                                                            First = Second) {
13113     Second = &LoadedSlices[CurrSlice];
13114 
13115     // If First is NULL, it means we start a new pair.
13116     // Get to the next slice.
13117     if (!First)
13118       continue;
13119 
13120     EVT LoadedType = First->getLoadedType();
13121 
13122     // If the types of the slices are different, we cannot pair them.
13123     if (LoadedType != Second->getLoadedType())
13124       continue;
13125 
13126     // Check if the target supplies paired loads for this type.
13127     unsigned RequiredAlignment = 0;
13128     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13129       // move to the next pair, this type is hopeless.
13130       Second = nullptr;
13131       continue;
13132     }
13133     // Check if we meet the alignment requirement.
13134     if (RequiredAlignment > First->getAlignment())
13135       continue;
13136 
13137     // Check that both loads are next to each other in memory.
13138     if (!areSlicesNextToEachOther(*First, *Second))
13139       continue;
13140 
13141     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13142     --GlobalLSCost.Loads;
13143     // Move to the next pair.
13144     Second = nullptr;
13145   }
13146 }
13147 
13148 /// Check the profitability of all involved LoadedSlice.
13149 /// Currently, it is considered profitable if there is exactly two
13150 /// involved slices (1) which are (2) next to each other in memory, and
13151 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13152 ///
13153 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13154 /// the elements themselves.
13155 ///
13156 /// FIXME: When the cost model will be mature enough, we can relax
13157 /// constraints (1) and (2).
13158 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13159                                 const APInt &UsedBits, bool ForCodeSize) {
13160   unsigned NumberOfSlices = LoadedSlices.size();
13161   if (StressLoadSlicing)
13162     return NumberOfSlices > 1;
13163 
13164   // Check (1).
13165   if (NumberOfSlices != 2)
13166     return false;
13167 
13168   // Check (2).
13169   if (!areUsedBitsDense(UsedBits))
13170     return false;
13171 
13172   // Check (3).
13173   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13174   // The original code has one big load.
13175   OrigCost.Loads = 1;
13176   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13177     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13178     // Accumulate the cost of all the slices.
13179     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13180     GlobalSlicingCost += SliceCost;
13181 
13182     // Account as cost in the original configuration the gain obtained
13183     // with the current slices.
13184     OrigCost.addSliceGain(LS);
13185   }
13186 
13187   // If the target supports paired load, adjust the cost accordingly.
13188   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13189   return OrigCost > GlobalSlicingCost;
13190 }
13191 
13192 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13193 /// operations, split it in the various pieces being extracted.
13194 ///
13195 /// This sort of thing is introduced by SROA.
13196 /// This slicing takes care not to insert overlapping loads.
13197 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13198 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13199   if (Level < AfterLegalizeDAG)
13200     return false;
13201 
13202   LoadSDNode *LD = cast<LoadSDNode>(N);
13203   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13204       !LD->getValueType(0).isInteger())
13205     return false;
13206 
13207   // Keep track of already used bits to detect overlapping values.
13208   // In that case, we will just abort the transformation.
13209   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13210 
13211   SmallVector<LoadedSlice, 4> LoadedSlices;
13212 
13213   // Check if this load is used as several smaller chunks of bits.
13214   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13215   // of computation for each trunc.
13216   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13217        UI != UIEnd; ++UI) {
13218     // Skip the uses of the chain.
13219     if (UI.getUse().getResNo() != 0)
13220       continue;
13221 
13222     SDNode *User = *UI;
13223     unsigned Shift = 0;
13224 
13225     // Check if this is a trunc(lshr).
13226     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13227         isa<ConstantSDNode>(User->getOperand(1))) {
13228       Shift = User->getConstantOperandVal(1);
13229       User = *User->use_begin();
13230     }
13231 
13232     // At this point, User is a Truncate, iff we encountered, trunc or
13233     // trunc(lshr).
13234     if (User->getOpcode() != ISD::TRUNCATE)
13235       return false;
13236 
13237     // The width of the type must be a power of 2 and greater than 8-bits.
13238     // Otherwise the load cannot be represented in LLVM IR.
13239     // Moreover, if we shifted with a non-8-bits multiple, the slice
13240     // will be across several bytes. We do not support that.
13241     unsigned Width = User->getValueSizeInBits(0);
13242     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13243       return false;
13244 
13245     // Build the slice for this chain of computations.
13246     LoadedSlice LS(User, LD, Shift, &DAG);
13247     APInt CurrentUsedBits = LS.getUsedBits();
13248 
13249     // Check if this slice overlaps with another.
13250     if ((CurrentUsedBits & UsedBits) != 0)
13251       return false;
13252     // Update the bits used globally.
13253     UsedBits |= CurrentUsedBits;
13254 
13255     // Check if the new slice would be legal.
13256     if (!LS.isLegal())
13257       return false;
13258 
13259     // Record the slice.
13260     LoadedSlices.push_back(LS);
13261   }
13262 
13263   // Abort slicing if it does not seem to be profitable.
13264   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13265     return false;
13266 
13267   ++SlicedLoads;
13268 
13269   // Rewrite each chain to use an independent load.
13270   // By construction, each chain can be represented by a unique load.
13271 
13272   // Prepare the argument for the new token factor for all the slices.
13273   SmallVector<SDValue, 8> ArgChains;
13274   for (SmallVectorImpl<LoadedSlice>::const_iterator
13275            LSIt = LoadedSlices.begin(),
13276            LSItEnd = LoadedSlices.end();
13277        LSIt != LSItEnd; ++LSIt) {
13278     SDValue SliceInst = LSIt->loadSlice();
13279     CombineTo(LSIt->Inst, SliceInst, true);
13280     if (SliceInst.getOpcode() != ISD::LOAD)
13281       SliceInst = SliceInst.getOperand(0);
13282     assert(SliceInst->getOpcode() == ISD::LOAD &&
13283            "It takes more than a zext to get to the loaded slice!!");
13284     ArgChains.push_back(SliceInst.getValue(1));
13285   }
13286 
13287   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13288                               ArgChains);
13289   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13290   AddToWorklist(Chain.getNode());
13291   return true;
13292 }
13293 
13294 /// Check to see if V is (and load (ptr), imm), where the load is having
13295 /// specific bytes cleared out.  If so, return the byte size being masked out
13296 /// and the shift amount.
13297 static std::pair<unsigned, unsigned>
13298 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13299   std::pair<unsigned, unsigned> Result(0, 0);
13300 
13301   // Check for the structure we're looking for.
13302   if (V->getOpcode() != ISD::AND ||
13303       !isa<ConstantSDNode>(V->getOperand(1)) ||
13304       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13305     return Result;
13306 
13307   // Check the chain and pointer.
13308   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13309   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13310 
13311   // This only handles simple types.
13312   if (V.getValueType() != MVT::i16 &&
13313       V.getValueType() != MVT::i32 &&
13314       V.getValueType() != MVT::i64)
13315     return Result;
13316 
13317   // Check the constant mask.  Invert it so that the bits being masked out are
13318   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13319   // follow the sign bit for uniformity.
13320   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13321   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13322   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13323   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13324   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13325   if (NotMaskLZ == 64) return Result;  // All zero mask.
13326 
13327   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13328   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13329     return Result;
13330 
13331   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13332   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13333     NotMaskLZ -= 64-V.getValueSizeInBits();
13334 
13335   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13336   switch (MaskedBytes) {
13337   case 1:
13338   case 2:
13339   case 4: break;
13340   default: return Result; // All one mask, or 5-byte mask.
13341   }
13342 
13343   // Verify that the first bit starts at a multiple of mask so that the access
13344   // is aligned the same as the access width.
13345   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13346 
13347   // For narrowing to be valid, it must be the case that the load the
13348   // immediately preceeding memory operation before the store.
13349   if (LD == Chain.getNode())
13350     ; // ok.
13351   else if (Chain->getOpcode() == ISD::TokenFactor &&
13352            SDValue(LD, 1).hasOneUse()) {
13353     // LD has only 1 chain use so they are no indirect dependencies.
13354     bool isOk = false;
13355     for (const SDValue &ChainOp : Chain->op_values())
13356       if (ChainOp.getNode() == LD) {
13357         isOk = true;
13358         break;
13359       }
13360     if (!isOk)
13361       return Result;
13362   } else
13363     return Result; // Fail.
13364 
13365   Result.first = MaskedBytes;
13366   Result.second = NotMaskTZ/8;
13367   return Result;
13368 }
13369 
13370 /// Check to see if IVal is something that provides a value as specified by
13371 /// MaskInfo. If so, replace the specified store with a narrower store of
13372 /// truncated IVal.
13373 static SDNode *
13374 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13375                                 SDValue IVal, StoreSDNode *St,
13376                                 DAGCombiner *DC) {
13377   unsigned NumBytes = MaskInfo.first;
13378   unsigned ByteShift = MaskInfo.second;
13379   SelectionDAG &DAG = DC->getDAG();
13380 
13381   // Check to see if IVal is all zeros in the part being masked in by the 'or'
13382   // that uses this.  If not, this is not a replacement.
13383   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13384                                   ByteShift*8, (ByteShift+NumBytes)*8);
13385   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13386 
13387   // Check that it is legal on the target to do this.  It is legal if the new
13388   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13389   // legalization.
13390   MVT VT = MVT::getIntegerVT(NumBytes*8);
13391   if (!DC->isTypeLegal(VT))
13392     return nullptr;
13393 
13394   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
13395   // shifted by ByteShift and truncated down to NumBytes.
13396   if (ByteShift) {
13397     SDLoc DL(IVal);
13398     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13399                        DAG.getConstant(ByteShift*8, DL,
13400                                     DC->getShiftAmountTy(IVal.getValueType())));
13401   }
13402 
13403   // Figure out the offset for the store and the alignment of the access.
13404   unsigned StOffset;
13405   unsigned NewAlign = St->getAlignment();
13406 
13407   if (DAG.getDataLayout().isLittleEndian())
13408     StOffset = ByteShift;
13409   else
13410     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13411 
13412   SDValue Ptr = St->getBasePtr();
13413   if (StOffset) {
13414     SDLoc DL(IVal);
13415     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13416                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13417     NewAlign = MinAlign(NewAlign, StOffset);
13418   }
13419 
13420   // Truncate down to the new size.
13421   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13422 
13423   ++OpsNarrowed;
13424   return DAG
13425       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13426                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13427       .getNode();
13428 }
13429 
13430 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13431 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13432 /// narrowing the load and store if it would end up being a win for performance
13433 /// or code size.
13434 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13435   StoreSDNode *ST  = cast<StoreSDNode>(N);
13436   if (ST->isVolatile())
13437     return SDValue();
13438 
13439   SDValue Chain = ST->getChain();
13440   SDValue Value = ST->getValue();
13441   SDValue Ptr   = ST->getBasePtr();
13442   EVT VT = Value.getValueType();
13443 
13444   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13445     return SDValue();
13446 
13447   unsigned Opc = Value.getOpcode();
13448 
13449   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13450   // is a byte mask indicating a consecutive number of bytes, check to see if
13451   // Y is known to provide just those bytes.  If so, we try to replace the
13452   // load + replace + store sequence with a single (narrower) store, which makes
13453   // the load dead.
13454   if (Opc == ISD::OR) {
13455     std::pair<unsigned, unsigned> MaskedLoad;
13456     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13457     if (MaskedLoad.first)
13458       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13459                                                   Value.getOperand(1), ST,this))
13460         return SDValue(NewST, 0);
13461 
13462     // Or is commutative, so try swapping X and Y.
13463     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13464     if (MaskedLoad.first)
13465       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13466                                                   Value.getOperand(0), ST,this))
13467         return SDValue(NewST, 0);
13468   }
13469 
13470   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13471       Value.getOperand(1).getOpcode() != ISD::Constant)
13472     return SDValue();
13473 
13474   SDValue N0 = Value.getOperand(0);
13475   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13476       Chain == SDValue(N0.getNode(), 1)) {
13477     LoadSDNode *LD = cast<LoadSDNode>(N0);
13478     if (LD->getBasePtr() != Ptr ||
13479         LD->getPointerInfo().getAddrSpace() !=
13480         ST->getPointerInfo().getAddrSpace())
13481       return SDValue();
13482 
13483     // Find the type to narrow it the load / op / store to.
13484     SDValue N1 = Value.getOperand(1);
13485     unsigned BitWidth = N1.getValueSizeInBits();
13486     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13487     if (Opc == ISD::AND)
13488       Imm ^= APInt::getAllOnesValue(BitWidth);
13489     if (Imm == 0 || Imm.isAllOnesValue())
13490       return SDValue();
13491     unsigned ShAmt = Imm.countTrailingZeros();
13492     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13493     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13494     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13495     // The narrowing should be profitable, the load/store operation should be
13496     // legal (or custom) and the store size should be equal to the NewVT width.
13497     while (NewBW < BitWidth &&
13498            (NewVT.getStoreSizeInBits() != NewBW ||
13499             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13500             !TLI.isNarrowingProfitable(VT, NewVT))) {
13501       NewBW = NextPowerOf2(NewBW);
13502       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13503     }
13504     if (NewBW >= BitWidth)
13505       return SDValue();
13506 
13507     // If the lsb changed does not start at the type bitwidth boundary,
13508     // start at the previous one.
13509     if (ShAmt % NewBW)
13510       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13511     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13512                                    std::min(BitWidth, ShAmt + NewBW));
13513     if ((Imm & Mask) == Imm) {
13514       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13515       if (Opc == ISD::AND)
13516         NewImm ^= APInt::getAllOnesValue(NewBW);
13517       uint64_t PtrOff = ShAmt / 8;
13518       // For big endian targets, we need to adjust the offset to the pointer to
13519       // load the correct bytes.
13520       if (DAG.getDataLayout().isBigEndian())
13521         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13522 
13523       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13524       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13525       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13526         return SDValue();
13527 
13528       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13529                                    Ptr.getValueType(), Ptr,
13530                                    DAG.getConstant(PtrOff, SDLoc(LD),
13531                                                    Ptr.getValueType()));
13532       SDValue NewLD =
13533           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13534                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13535                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
13536       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13537                                    DAG.getConstant(NewImm, SDLoc(Value),
13538                                                    NewVT));
13539       SDValue NewST =
13540           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13541                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13542 
13543       AddToWorklist(NewPtr.getNode());
13544       AddToWorklist(NewLD.getNode());
13545       AddToWorklist(NewVal.getNode());
13546       WorklistRemover DeadNodes(*this);
13547       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13548       ++OpsNarrowed;
13549       return NewST;
13550     }
13551   }
13552 
13553   return SDValue();
13554 }
13555 
13556 /// For a given floating point load / store pair, if the load value isn't used
13557 /// by any other operations, then consider transforming the pair to integer
13558 /// load / store operations if the target deems the transformation profitable.
13559 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13560   StoreSDNode *ST  = cast<StoreSDNode>(N);
13561   SDValue Chain = ST->getChain();
13562   SDValue Value = ST->getValue();
13563   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13564       Value.hasOneUse() &&
13565       Chain == SDValue(Value.getNode(), 1)) {
13566     LoadSDNode *LD = cast<LoadSDNode>(Value);
13567     EVT VT = LD->getMemoryVT();
13568     if (!VT.isFloatingPoint() ||
13569         VT != ST->getMemoryVT() ||
13570         LD->isNonTemporal() ||
13571         ST->isNonTemporal() ||
13572         LD->getPointerInfo().getAddrSpace() != 0 ||
13573         ST->getPointerInfo().getAddrSpace() != 0)
13574       return SDValue();
13575 
13576     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13577     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13578         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13579         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13580         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13581       return SDValue();
13582 
13583     unsigned LDAlign = LD->getAlignment();
13584     unsigned STAlign = ST->getAlignment();
13585     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13586     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13587     if (LDAlign < ABIAlign || STAlign < ABIAlign)
13588       return SDValue();
13589 
13590     SDValue NewLD =
13591         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13592                     LD->getPointerInfo(), LDAlign);
13593 
13594     SDValue NewST =
13595         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13596                      ST->getPointerInfo(), STAlign);
13597 
13598     AddToWorklist(NewLD.getNode());
13599     AddToWorklist(NewST.getNode());
13600     WorklistRemover DeadNodes(*this);
13601     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13602     ++LdStFP2Int;
13603     return NewST;
13604   }
13605 
13606   return SDValue();
13607 }
13608 
13609 // This is a helper function for visitMUL to check the profitability
13610 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13611 // MulNode is the original multiply, AddNode is (add x, c1),
13612 // and ConstNode is c2.
13613 //
13614 // If the (add x, c1) has multiple uses, we could increase
13615 // the number of adds if we make this transformation.
13616 // It would only be worth doing this if we can remove a
13617 // multiply in the process. Check for that here.
13618 // To illustrate:
13619 //     (A + c1) * c3
13620 //     (A + c2) * c3
13621 // We're checking for cases where we have common "c3 * A" expressions.
13622 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13623                                               SDValue &AddNode,
13624                                               SDValue &ConstNode) {
13625   APInt Val;
13626 
13627   // If the add only has one use, this would be OK to do.
13628   if (AddNode.getNode()->hasOneUse())
13629     return true;
13630 
13631   // Walk all the users of the constant with which we're multiplying.
13632   for (SDNode *Use : ConstNode->uses()) {
13633     if (Use == MulNode) // This use is the one we're on right now. Skip it.
13634       continue;
13635 
13636     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13637       SDNode *OtherOp;
13638       SDNode *MulVar = AddNode.getOperand(0).getNode();
13639 
13640       // OtherOp is what we're multiplying against the constant.
13641       if (Use->getOperand(0) == ConstNode)
13642         OtherOp = Use->getOperand(1).getNode();
13643       else
13644         OtherOp = Use->getOperand(0).getNode();
13645 
13646       // Check to see if multiply is with the same operand of our "add".
13647       //
13648       //     ConstNode  = CONST
13649       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
13650       //     ...
13651       //     AddNode  = (A + c1)  <-- MulVar is A.
13652       //         = AddNode * ConstNode   <-- current visiting instruction.
13653       //
13654       // If we make this transformation, we will have a common
13655       // multiply (ConstNode * A) that we can save.
13656       if (OtherOp == MulVar)
13657         return true;
13658 
13659       // Now check to see if a future expansion will give us a common
13660       // multiply.
13661       //
13662       //     ConstNode  = CONST
13663       //     AddNode    = (A + c1)
13664       //     ...   = AddNode * ConstNode <-- current visiting instruction.
13665       //     ...
13666       //     OtherOp = (A + c2)
13667       //     Use     = OtherOp * ConstNode <-- visiting Use.
13668       //
13669       // If we make this transformation, we will have a common
13670       // multiply (CONST * A) after we also do the same transformation
13671       // to the "t2" instruction.
13672       if (OtherOp->getOpcode() == ISD::ADD &&
13673           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13674           OtherOp->getOperand(0).getNode() == MulVar)
13675         return true;
13676     }
13677   }
13678 
13679   // Didn't find a case where this would be profitable.
13680   return false;
13681 }
13682 
13683 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13684                                          unsigned NumStores) {
13685   SmallVector<SDValue, 8> Chains;
13686   SmallPtrSet<const SDNode *, 8> Visited;
13687   SDLoc StoreDL(StoreNodes[0].MemNode);
13688 
13689   for (unsigned i = 0; i < NumStores; ++i) {
13690     Visited.insert(StoreNodes[i].MemNode);
13691   }
13692 
13693   // don't include nodes that are children
13694   for (unsigned i = 0; i < NumStores; ++i) {
13695     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13696       Chains.push_back(StoreNodes[i].MemNode->getChain());
13697   }
13698 
13699   assert(Chains.size() > 0 && "Chain should have generated a chain");
13700   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13701 }
13702 
13703 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13704     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13705     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13706   // Make sure we have something to merge.
13707   if (NumStores < 2)
13708     return false;
13709 
13710   // The latest Node in the DAG.
13711   SDLoc DL(StoreNodes[0].MemNode);
13712 
13713   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13714   unsigned SizeInBits = NumStores * ElementSizeBits;
13715   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13716 
13717   EVT StoreTy;
13718   if (UseVector) {
13719     unsigned Elts = NumStores * NumMemElts;
13720     // Get the type for the merged vector store.
13721     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13722   } else
13723     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
13724 
13725   SDValue StoredVal;
13726   if (UseVector) {
13727     if (IsConstantSrc) {
13728       SmallVector<SDValue, 8> BuildVector;
13729       for (unsigned I = 0; I != NumStores; ++I) {
13730         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
13731         SDValue Val = St->getValue();
13732         // If constant is of the wrong type, convert it now.
13733         if (MemVT != Val.getValueType()) {
13734           Val = peekThroughBitcast(Val);
13735           // Deal with constants of wrong size.
13736           if (ElementSizeBits != Val.getValueSizeInBits()) {
13737             EVT IntMemVT =
13738                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
13739             if (isa<ConstantFPSDNode>(Val)) {
13740               // Not clear how to truncate FP values.
13741               return false;
13742             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
13743               Val = DAG.getConstant(C->getAPIntValue()
13744                                         .zextOrTrunc(Val.getValueSizeInBits())
13745                                         .zextOrTrunc(ElementSizeBits),
13746                                     SDLoc(C), IntMemVT);
13747           }
13748           // Make sure correctly size type is the correct type.
13749           Val = DAG.getBitcast(MemVT, Val);
13750         }
13751         BuildVector.push_back(Val);
13752       }
13753       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13754                                                : ISD::BUILD_VECTOR,
13755                               DL, StoreTy, BuildVector);
13756     } else {
13757       SmallVector<SDValue, 8> Ops;
13758       for (unsigned i = 0; i < NumStores; ++i) {
13759         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13760         SDValue Val = peekThroughBitcast(St->getValue());
13761         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
13762         // type MemVT. If the underlying value is not the correct
13763         // type, but it is an extraction of an appropriate vector we
13764         // can recast Val to be of the correct type. This may require
13765         // converting between EXTRACT_VECTOR_ELT and
13766         // EXTRACT_SUBVECTOR.
13767         if ((MemVT != Val.getValueType()) &&
13768             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13769              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
13770           SDValue Vec = Val.getOperand(0);
13771           EVT MemVTScalarTy = MemVT.getScalarType();
13772           // We may need to add a bitcast here to get types to line up.
13773           if (MemVTScalarTy != Vec.getValueType()) {
13774             unsigned Elts = Vec.getValueType().getSizeInBits() /
13775                             MemVTScalarTy.getSizeInBits();
13776             EVT NewVecTy =
13777                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
13778             Vec = DAG.getBitcast(NewVecTy, Vec);
13779           }
13780           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
13781                                         : ISD::EXTRACT_VECTOR_ELT;
13782           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
13783         }
13784         Ops.push_back(Val);
13785       }
13786 
13787       // Build the extracted vector elements back into a vector.
13788       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13789                                                : ISD::BUILD_VECTOR,
13790                               DL, StoreTy, Ops);
13791     }
13792   } else {
13793     // We should always use a vector store when merging extracted vector
13794     // elements, so this path implies a store of constants.
13795     assert(IsConstantSrc && "Merged vector elements should use vector store");
13796 
13797     APInt StoreInt(SizeInBits, 0);
13798 
13799     // Construct a single integer constant which is made of the smaller
13800     // constant inputs.
13801     bool IsLE = DAG.getDataLayout().isLittleEndian();
13802     for (unsigned i = 0; i < NumStores; ++i) {
13803       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
13804       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
13805 
13806       SDValue Val = St->getValue();
13807       Val = peekThroughBitcast(Val);
13808       StoreInt <<= ElementSizeBits;
13809       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
13810         StoreInt |= C->getAPIntValue()
13811                         .zextOrTrunc(ElementSizeBits)
13812                         .zextOrTrunc(SizeInBits);
13813       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
13814         StoreInt |= C->getValueAPF()
13815                         .bitcastToAPInt()
13816                         .zextOrTrunc(ElementSizeBits)
13817                         .zextOrTrunc(SizeInBits);
13818         // If fp truncation is necessary give up for now.
13819         if (MemVT.getSizeInBits() != ElementSizeBits)
13820           return false;
13821       } else {
13822         llvm_unreachable("Invalid constant element type");
13823       }
13824     }
13825 
13826     // Create the new Load and Store operations.
13827     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
13828   }
13829 
13830   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13831   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
13832 
13833   // make sure we use trunc store if it's necessary to be legal.
13834   SDValue NewStore;
13835   if (!UseTrunc) {
13836     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
13837                             FirstInChain->getPointerInfo(),
13838                             FirstInChain->getAlignment());
13839   } else { // Must be realized as a trunc store
13840     EVT LegalizedStoredValTy =
13841         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
13842     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
13843     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
13844     SDValue ExtendedStoreVal =
13845         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
13846                         LegalizedStoredValTy);
13847     NewStore = DAG.getTruncStore(
13848         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
13849         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
13850         FirstInChain->getAlignment(),
13851         FirstInChain->getMemOperand()->getFlags());
13852   }
13853 
13854   // Replace all merged stores with the new store.
13855   for (unsigned i = 0; i < NumStores; ++i)
13856     CombineTo(StoreNodes[i].MemNode, NewStore);
13857 
13858   AddToWorklist(NewChain.getNode());
13859   return true;
13860 }
13861 
13862 void DAGCombiner::getStoreMergeCandidates(
13863     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
13864     SDNode *&RootNode) {
13865   // This holds the base pointer, index, and the offset in bytes from the base
13866   // pointer.
13867   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
13868   EVT MemVT = St->getMemoryVT();
13869 
13870   SDValue Val = peekThroughBitcast(St->getValue());
13871   // We must have a base and an offset.
13872   if (!BasePtr.getBase().getNode())
13873     return;
13874 
13875   // Do not handle stores to undef base pointers.
13876   if (BasePtr.getBase().isUndef())
13877     return;
13878 
13879   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
13880   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13881                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13882   bool IsLoadSrc = isa<LoadSDNode>(Val);
13883   BaseIndexOffset LBasePtr;
13884   // Match on loadbaseptr if relevant.
13885   EVT LoadVT;
13886   if (IsLoadSrc) {
13887     auto *Ld = cast<LoadSDNode>(Val);
13888     LBasePtr = BaseIndexOffset::match(Ld, DAG);
13889     LoadVT = Ld->getMemoryVT();
13890     // Load and store should be the same type.
13891     if (MemVT != LoadVT)
13892       return;
13893     // Loads must only have one use.
13894     if (!Ld->hasNUsesOfValue(1, 0))
13895       return;
13896     // The memory operands must not be volatile.
13897     if (Ld->isVolatile() || Ld->isIndexed())
13898       return;
13899   }
13900   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13901                             int64_t &Offset) -> bool {
13902     if (Other->isVolatile() || Other->isIndexed())
13903       return false;
13904     SDValue Val = peekThroughBitcast(Other->getValue());
13905     // Allow merging constants of different types as integers.
13906     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13907                                            : Other->getMemoryVT() != MemVT;
13908     if (IsLoadSrc) {
13909       if (NoTypeMatch)
13910         return false;
13911       // The Load's Base Ptr must also match
13912       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13913         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13914         if (LoadVT != OtherLd->getMemoryVT())
13915           return false;
13916         // Loads must only have one use.
13917         if (!OtherLd->hasNUsesOfValue(1, 0))
13918           return false;
13919         // The memory operands must not be volatile.
13920         if (OtherLd->isVolatile() || OtherLd->isIndexed())
13921           return false;
13922         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13923           return false;
13924       } else
13925         return false;
13926     }
13927     if (IsConstantSrc) {
13928       if (NoTypeMatch)
13929         return false;
13930       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13931         return false;
13932     }
13933     if (IsExtractVecSrc) {
13934       // Do not merge truncated stores here.
13935       if (Other->isTruncatingStore())
13936         return false;
13937       if (!MemVT.bitsEq(Val.getValueType()))
13938         return false;
13939       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13940           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13941         return false;
13942     }
13943     Ptr = BaseIndexOffset::match(Other, DAG);
13944     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13945   };
13946 
13947   // We looking for a root node which is an ancestor to all mergable
13948   // stores. We search up through a load, to our root and then down
13949   // through all children. For instance we will find Store{1,2,3} if
13950   // St is Store1, Store2. or Store3 where the root is not a load
13951   // which always true for nonvolatile ops. TODO: Expand
13952   // the search to find all valid candidates through multiple layers of loads.
13953   //
13954   // Root
13955   // |-------|-------|
13956   // Load    Load    Store3
13957   // |       |
13958   // Store1   Store2
13959   //
13960   // FIXME: We should be able to climb and
13961   // descend TokenFactors to find candidates as well.
13962 
13963   RootNode = St->getChain().getNode();
13964 
13965   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13966     RootNode = Ldn->getChain().getNode();
13967     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13968       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13969         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13970           if (I2.getOperandNo() == 0)
13971             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13972               BaseIndexOffset Ptr;
13973               int64_t PtrDiff;
13974               if (CandidateMatch(OtherST, Ptr, PtrDiff))
13975                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13976             }
13977   } else
13978     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13979       if (I.getOperandNo() == 0)
13980         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13981           BaseIndexOffset Ptr;
13982           int64_t PtrDiff;
13983           if (CandidateMatch(OtherST, Ptr, PtrDiff))
13984             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13985         }
13986 }
13987 
13988 // We need to check that merging these stores does not cause a loop in
13989 // the DAG. Any store candidate may depend on another candidate
13990 // indirectly through its operand (we already consider dependencies
13991 // through the chain). Check in parallel by searching up from
13992 // non-chain operands of candidates.
13993 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13994     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
13995     SDNode *RootNode) {
13996   // FIXME: We should be able to truncate a full search of
13997   // predecessors by doing a BFS and keeping tabs the originating
13998   // stores from which worklist nodes come from in a similar way to
13999   // TokenFactor simplfication.
14000 
14001   SmallPtrSet<const SDNode *, 32> Visited;
14002   SmallVector<const SDNode *, 8> Worklist;
14003 
14004   // RootNode is a predecessor to all candidates so we need not search
14005   // past it. Add RootNode (peeking through TokenFactors). Do not count
14006   // these towards size check.
14007 
14008   Worklist.push_back(RootNode);
14009   while (!Worklist.empty()) {
14010     auto N = Worklist.pop_back_val();
14011     if (!Visited.insert(N).second)
14012       continue; // Already present in Visited.
14013     if (N->getOpcode() == ISD::TokenFactor) {
14014       for (SDValue Op : N->ops())
14015         Worklist.push_back(Op.getNode());
14016     }
14017   }
14018 
14019   // Don't count pruning nodes towards max.
14020   unsigned int Max = 1024 + Visited.size();
14021   // Search Ops of store candidates.
14022   for (unsigned i = 0; i < NumStores; ++i) {
14023     SDNode *N = StoreNodes[i].MemNode;
14024     // Of the 4 Store Operands:
14025     //   * Chain (Op 0) -> We have already considered these
14026     //                    in candidate selection and can be
14027     //                    safely ignored
14028     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14029     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
14030     //                      and so no cycles are possible.
14031     //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
14032     //
14033     // Thus we need only check predecessors of the value operands.
14034     auto *Op = N->getOperand(1).getNode();
14035     if (Visited.insert(Op).second)
14036       Worklist.push_back(Op);
14037   }
14038   // Search through DAG. We can stop early if we find a store node.
14039   for (unsigned i = 0; i < NumStores; ++i)
14040     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14041                                      Max))
14042       return false;
14043   return true;
14044 }
14045 
14046 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14047   if (OptLevel == CodeGenOpt::None)
14048     return false;
14049 
14050   EVT MemVT = St->getMemoryVT();
14051   int64_t ElementSizeBytes = MemVT.getStoreSize();
14052   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14053 
14054   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14055     return false;
14056 
14057   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14058       Attribute::NoImplicitFloat);
14059 
14060   // This function cannot currently deal with non-byte-sized memory sizes.
14061   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14062     return false;
14063 
14064   if (!MemVT.isSimple())
14065     return false;
14066 
14067   // Perform an early exit check. Do not bother looking at stored values that
14068   // are not constants, loads, or extracted vector elements.
14069   SDValue StoredVal = peekThroughBitcast(St->getValue());
14070   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14071   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14072                        isa<ConstantFPSDNode>(StoredVal);
14073   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14074                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14075 
14076   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14077     return false;
14078 
14079   SmallVector<MemOpLink, 8> StoreNodes;
14080   SDNode *RootNode;
14081   // Find potential store merge candidates by searching through chain sub-DAG
14082   getStoreMergeCandidates(St, StoreNodes, RootNode);
14083 
14084   // Check if there is anything to merge.
14085   if (StoreNodes.size() < 2)
14086     return false;
14087 
14088   // Sort the memory operands according to their distance from the
14089   // base pointer.
14090   llvm::sort(StoreNodes.begin(), StoreNodes.end(),
14091              [](MemOpLink LHS, MemOpLink RHS) {
14092                return LHS.OffsetFromBase < RHS.OffsetFromBase;
14093              });
14094 
14095   // Store Merge attempts to merge the lowest stores. This generally
14096   // works out as if successful, as the remaining stores are checked
14097   // after the first collection of stores is merged. However, in the
14098   // case that a non-mergeable store is found first, e.g., {p[-2],
14099   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14100   // mergeable cases. To prevent this, we prune such stores from the
14101   // front of StoreNodes here.
14102 
14103   bool RV = false;
14104   while (StoreNodes.size() > 1) {
14105     unsigned StartIdx = 0;
14106     while ((StartIdx + 1 < StoreNodes.size()) &&
14107            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14108                StoreNodes[StartIdx + 1].OffsetFromBase)
14109       ++StartIdx;
14110 
14111     // Bail if we don't have enough candidates to merge.
14112     if (StartIdx + 1 >= StoreNodes.size())
14113       return RV;
14114 
14115     if (StartIdx)
14116       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14117 
14118     // Scan the memory operations on the chain and find the first
14119     // non-consecutive store memory address.
14120     unsigned NumConsecutiveStores = 1;
14121     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14122     // Check that the addresses are consecutive starting from the second
14123     // element in the list of stores.
14124     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14125       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14126       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14127         break;
14128       NumConsecutiveStores = i + 1;
14129     }
14130 
14131     if (NumConsecutiveStores < 2) {
14132       StoreNodes.erase(StoreNodes.begin(),
14133                        StoreNodes.begin() + NumConsecutiveStores);
14134       continue;
14135     }
14136 
14137     // The node with the lowest store address.
14138     LLVMContext &Context = *DAG.getContext();
14139     const DataLayout &DL = DAG.getDataLayout();
14140 
14141     // Store the constants into memory as one consecutive store.
14142     if (IsConstantSrc) {
14143       while (NumConsecutiveStores >= 2) {
14144         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14145         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14146         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14147         unsigned LastLegalType = 1;
14148         unsigned LastLegalVectorType = 1;
14149         bool LastIntegerTrunc = false;
14150         bool NonZero = false;
14151         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14152         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14153           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14154           SDValue StoredVal = ST->getValue();
14155           bool IsElementZero = false;
14156           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14157             IsElementZero = C->isNullValue();
14158           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14159             IsElementZero = C->getConstantFPValue()->isNullValue();
14160           if (IsElementZero) {
14161             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14162               FirstZeroAfterNonZero = i;
14163           }
14164           NonZero |= !IsElementZero;
14165 
14166           // Find a legal type for the constant store.
14167           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14168           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14169           bool IsFast = false;
14170 
14171           // Break early when size is too large to be legal.
14172           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14173             break;
14174 
14175           if (TLI.isTypeLegal(StoreTy) &&
14176               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14177               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14178                                      FirstStoreAlign, &IsFast) &&
14179               IsFast) {
14180             LastIntegerTrunc = false;
14181             LastLegalType = i + 1;
14182             // Or check whether a truncstore is legal.
14183           } else if (TLI.getTypeAction(Context, StoreTy) ==
14184                      TargetLowering::TypePromoteInteger) {
14185             EVT LegalizedStoredValTy =
14186                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14187             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14188                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14189                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14190                                        FirstStoreAlign, &IsFast) &&
14191                 IsFast) {
14192               LastIntegerTrunc = true;
14193               LastLegalType = i + 1;
14194             }
14195           }
14196 
14197           // We only use vectors if the constant is known to be zero or the
14198           // target allows it and the function is not marked with the
14199           // noimplicitfloat attribute.
14200           if ((!NonZero ||
14201                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14202               !NoVectors) {
14203             // Find a legal type for the vector store.
14204             unsigned Elts = (i + 1) * NumMemElts;
14205             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14206             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14207                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14208                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14209                                        FirstStoreAlign, &IsFast) &&
14210                 IsFast)
14211               LastLegalVectorType = i + 1;
14212           }
14213         }
14214 
14215         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14216         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14217 
14218         // Check if we found a legal integer type that creates a meaningful
14219         // merge.
14220         if (NumElem < 2) {
14221           // We know that candidate stores are in order and of correct
14222           // shape. While there is no mergeable sequence from the
14223           // beginning one may start later in the sequence. The only
14224           // reason a merge of size N could have failed where another of
14225           // the same size would not have, is if the alignment has
14226           // improved or we've dropped a non-zero value. Drop as many
14227           // candidates as we can here.
14228           unsigned NumSkip = 1;
14229           while (
14230               (NumSkip < NumConsecutiveStores) &&
14231               (NumSkip < FirstZeroAfterNonZero) &&
14232               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14233             NumSkip++;
14234 
14235           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14236           NumConsecutiveStores -= NumSkip;
14237           continue;
14238         }
14239 
14240         // Check that we can merge these candidates without causing a cycle.
14241         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14242                                                       RootNode)) {
14243           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14244           NumConsecutiveStores -= NumElem;
14245           continue;
14246         }
14247 
14248         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14249                                               UseVector, LastIntegerTrunc);
14250 
14251         // Remove merged stores for next iteration.
14252         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14253         NumConsecutiveStores -= NumElem;
14254       }
14255       continue;
14256     }
14257 
14258     // When extracting multiple vector elements, try to store them
14259     // in one vector store rather than a sequence of scalar stores.
14260     if (IsExtractVecSrc) {
14261       // Loop on Consecutive Stores on success.
14262       while (NumConsecutiveStores >= 2) {
14263         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14264         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14265         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14266         unsigned NumStoresToMerge = 1;
14267         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14268           // Find a legal type for the vector store.
14269           unsigned Elts = (i + 1) * NumMemElts;
14270           EVT Ty =
14271               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14272           bool IsFast;
14273 
14274           // Break early when size is too large to be legal.
14275           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14276             break;
14277 
14278           if (TLI.isTypeLegal(Ty) &&
14279               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14280               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14281                                      FirstStoreAlign, &IsFast) &&
14282               IsFast)
14283             NumStoresToMerge = i + 1;
14284         }
14285 
14286         // Check if we found a legal integer type creating a meaningful
14287         // merge.
14288         if (NumStoresToMerge < 2) {
14289           // We know that candidate stores are in order and of correct
14290           // shape. While there is no mergeable sequence from the
14291           // beginning one may start later in the sequence. The only
14292           // reason a merge of size N could have failed where another of
14293           // the same size would not have, is if the alignment has
14294           // improved. Drop as many candidates as we can here.
14295           unsigned NumSkip = 1;
14296           while (
14297               (NumSkip < NumConsecutiveStores) &&
14298               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14299             NumSkip++;
14300 
14301           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14302           NumConsecutiveStores -= NumSkip;
14303           continue;
14304         }
14305 
14306         // Check that we can merge these candidates without causing a cycle.
14307         if (!checkMergeStoreCandidatesForDependencies(
14308                 StoreNodes, NumStoresToMerge, RootNode)) {
14309           StoreNodes.erase(StoreNodes.begin(),
14310                            StoreNodes.begin() + NumStoresToMerge);
14311           NumConsecutiveStores -= NumStoresToMerge;
14312           continue;
14313         }
14314 
14315         RV |= MergeStoresOfConstantsOrVecElts(
14316             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14317 
14318         StoreNodes.erase(StoreNodes.begin(),
14319                          StoreNodes.begin() + NumStoresToMerge);
14320         NumConsecutiveStores -= NumStoresToMerge;
14321       }
14322       continue;
14323     }
14324 
14325     // Below we handle the case of multiple consecutive stores that
14326     // come from multiple consecutive loads. We merge them into a single
14327     // wide load and a single wide store.
14328 
14329     // Look for load nodes which are used by the stored values.
14330     SmallVector<MemOpLink, 8> LoadNodes;
14331 
14332     // Find acceptable loads. Loads need to have the same chain (token factor),
14333     // must not be zext, volatile, indexed, and they must be consecutive.
14334     BaseIndexOffset LdBasePtr;
14335 
14336     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14337       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14338       SDValue Val = peekThroughBitcast(St->getValue());
14339       LoadSDNode *Ld = cast<LoadSDNode>(Val);
14340 
14341       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14342       // If this is not the first ptr that we check.
14343       int64_t LdOffset = 0;
14344       if (LdBasePtr.getBase().getNode()) {
14345         // The base ptr must be the same.
14346         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14347           break;
14348       } else {
14349         // Check that all other base pointers are the same as this one.
14350         LdBasePtr = LdPtr;
14351       }
14352 
14353       // We found a potential memory operand to merge.
14354       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14355     }
14356 
14357     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14358       // If we have load/store pair instructions and we only have two values,
14359       // don't bother merging.
14360       unsigned RequiredAlignment;
14361       if (LoadNodes.size() == 2 &&
14362           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14363           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14364         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14365         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14366         break;
14367       }
14368       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14369       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14370       unsigned FirstStoreAlign = FirstInChain->getAlignment();
14371       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14372       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14373       unsigned FirstLoadAlign = FirstLoad->getAlignment();
14374 
14375       // Scan the memory operations on the chain and find the first
14376       // non-consecutive load memory address. These variables hold the index in
14377       // the store node array.
14378 
14379       unsigned LastConsecutiveLoad = 1;
14380 
14381       // This variable refers to the size and not index in the array.
14382       unsigned LastLegalVectorType = 1;
14383       unsigned LastLegalIntegerType = 1;
14384       bool isDereferenceable = true;
14385       bool DoIntegerTruncate = false;
14386       StartAddress = LoadNodes[0].OffsetFromBase;
14387       SDValue FirstChain = FirstLoad->getChain();
14388       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14389         // All loads must share the same chain.
14390         if (LoadNodes[i].MemNode->getChain() != FirstChain)
14391           break;
14392 
14393         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14394         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14395           break;
14396         LastConsecutiveLoad = i;
14397 
14398         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14399           isDereferenceable = false;
14400 
14401         // Find a legal type for the vector store.
14402         unsigned Elts = (i + 1) * NumMemElts;
14403         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14404 
14405         // Break early when size is too large to be legal.
14406         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14407           break;
14408 
14409         bool IsFastSt, IsFastLd;
14410         if (TLI.isTypeLegal(StoreTy) &&
14411             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14412             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14413                                    FirstStoreAlign, &IsFastSt) &&
14414             IsFastSt &&
14415             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14416                                    FirstLoadAlign, &IsFastLd) &&
14417             IsFastLd) {
14418           LastLegalVectorType = i + 1;
14419         }
14420 
14421         // Find a legal type for the integer store.
14422         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14423         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14424         if (TLI.isTypeLegal(StoreTy) &&
14425             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14426             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14427                                    FirstStoreAlign, &IsFastSt) &&
14428             IsFastSt &&
14429             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14430                                    FirstLoadAlign, &IsFastLd) &&
14431             IsFastLd) {
14432           LastLegalIntegerType = i + 1;
14433           DoIntegerTruncate = false;
14434           // Or check whether a truncstore and extload is legal.
14435         } else if (TLI.getTypeAction(Context, StoreTy) ==
14436                    TargetLowering::TypePromoteInteger) {
14437           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14438           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14439               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14440               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14441                                  StoreTy) &&
14442               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14443                                  StoreTy) &&
14444               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14445               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14446                                      FirstStoreAlign, &IsFastSt) &&
14447               IsFastSt &&
14448               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14449                                      FirstLoadAlign, &IsFastLd) &&
14450               IsFastLd) {
14451             LastLegalIntegerType = i + 1;
14452             DoIntegerTruncate = true;
14453           }
14454         }
14455       }
14456 
14457       // Only use vector types if the vector type is larger than the integer
14458       // type. If they are the same, use integers.
14459       bool UseVectorTy =
14460           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14461       unsigned LastLegalType =
14462           std::max(LastLegalVectorType, LastLegalIntegerType);
14463 
14464       // We add +1 here because the LastXXX variables refer to location while
14465       // the NumElem refers to array/index size.
14466       unsigned NumElem =
14467           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14468       NumElem = std::min(LastLegalType, NumElem);
14469 
14470       if (NumElem < 2) {
14471         // We know that candidate stores are in order and of correct
14472         // shape. While there is no mergeable sequence from the
14473         // beginning one may start later in the sequence. The only
14474         // reason a merge of size N could have failed where another of
14475         // the same size would not have is if the alignment or either
14476         // the load or store has improved. Drop as many candidates as we
14477         // can here.
14478         unsigned NumSkip = 1;
14479         while ((NumSkip < LoadNodes.size()) &&
14480                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14481                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14482           NumSkip++;
14483         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14484         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14485         NumConsecutiveStores -= NumSkip;
14486         continue;
14487       }
14488 
14489       // Check that we can merge these candidates without causing a cycle.
14490       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14491                                                     RootNode)) {
14492         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14493         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14494         NumConsecutiveStores -= NumElem;
14495         continue;
14496       }
14497 
14498       // Find if it is better to use vectors or integers to load and store
14499       // to memory.
14500       EVT JointMemOpVT;
14501       if (UseVectorTy) {
14502         // Find a legal type for the vector store.
14503         unsigned Elts = NumElem * NumMemElts;
14504         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14505       } else {
14506         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14507         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14508       }
14509 
14510       SDLoc LoadDL(LoadNodes[0].MemNode);
14511       SDLoc StoreDL(StoreNodes[0].MemNode);
14512 
14513       // The merged loads are required to have the same incoming chain, so
14514       // using the first's chain is acceptable.
14515 
14516       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14517       AddToWorklist(NewStoreChain.getNode());
14518 
14519       MachineMemOperand::Flags MMOFlags =
14520           isDereferenceable ? MachineMemOperand::MODereferenceable
14521                             : MachineMemOperand::MONone;
14522 
14523       SDValue NewLoad, NewStore;
14524       if (UseVectorTy || !DoIntegerTruncate) {
14525         NewLoad =
14526             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14527                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14528                         FirstLoadAlign, MMOFlags);
14529         NewStore = DAG.getStore(
14530             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14531             FirstInChain->getPointerInfo(), FirstStoreAlign);
14532       } else { // This must be the truncstore/extload case
14533         EVT ExtendedTy =
14534             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14535         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14536                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
14537                                  FirstLoad->getPointerInfo(), JointMemOpVT,
14538                                  FirstLoadAlign, MMOFlags);
14539         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14540                                      FirstInChain->getBasePtr(),
14541                                      FirstInChain->getPointerInfo(),
14542                                      JointMemOpVT, FirstInChain->getAlignment(),
14543                                      FirstInChain->getMemOperand()->getFlags());
14544       }
14545 
14546       // Transfer chain users from old loads to the new load.
14547       for (unsigned i = 0; i < NumElem; ++i) {
14548         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14549         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14550                                       SDValue(NewLoad.getNode(), 1));
14551       }
14552 
14553       // Replace the all stores with the new store. Recursively remove
14554       // corresponding value if its no longer used.
14555       for (unsigned i = 0; i < NumElem; ++i) {
14556         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14557         CombineTo(StoreNodes[i].MemNode, NewStore);
14558         if (Val.getNode()->use_empty())
14559           recursivelyDeleteUnusedNodes(Val.getNode());
14560       }
14561 
14562       RV = true;
14563       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14564       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14565       NumConsecutiveStores -= NumElem;
14566     }
14567   }
14568   return RV;
14569 }
14570 
14571 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14572   SDLoc SL(ST);
14573   SDValue ReplStore;
14574 
14575   // Replace the chain to avoid dependency.
14576   if (ST->isTruncatingStore()) {
14577     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14578                                   ST->getBasePtr(), ST->getMemoryVT(),
14579                                   ST->getMemOperand());
14580   } else {
14581     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14582                              ST->getMemOperand());
14583   }
14584 
14585   // Create token to keep both nodes around.
14586   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14587                               MVT::Other, ST->getChain(), ReplStore);
14588 
14589   // Make sure the new and old chains are cleaned up.
14590   AddToWorklist(Token.getNode());
14591 
14592   // Don't add users to work list.
14593   return CombineTo(ST, Token, false);
14594 }
14595 
14596 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14597   SDValue Value = ST->getValue();
14598   if (Value.getOpcode() == ISD::TargetConstantFP)
14599     return SDValue();
14600 
14601   SDLoc DL(ST);
14602 
14603   SDValue Chain = ST->getChain();
14604   SDValue Ptr = ST->getBasePtr();
14605 
14606   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14607 
14608   // NOTE: If the original store is volatile, this transform must not increase
14609   // the number of stores.  For example, on x86-32 an f64 can be stored in one
14610   // processor operation but an i64 (which is not legal) requires two.  So the
14611   // transform should not be done in this case.
14612 
14613   SDValue Tmp;
14614   switch (CFP->getSimpleValueType(0).SimpleTy) {
14615   default:
14616     llvm_unreachable("Unknown FP type");
14617   case MVT::f16:    // We don't do this for these yet.
14618   case MVT::f80:
14619   case MVT::f128:
14620   case MVT::ppcf128:
14621     return SDValue();
14622   case MVT::f32:
14623     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14624         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14625       ;
14626       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14627                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14628                             MVT::i32);
14629       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14630     }
14631 
14632     return SDValue();
14633   case MVT::f64:
14634     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14635          !ST->isVolatile()) ||
14636         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14637       ;
14638       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14639                             getZExtValue(), SDLoc(CFP), MVT::i64);
14640       return DAG.getStore(Chain, DL, Tmp,
14641                           Ptr, ST->getMemOperand());
14642     }
14643 
14644     if (!ST->isVolatile() &&
14645         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14646       // Many FP stores are not made apparent until after legalize, e.g. for
14647       // argument passing.  Since this is so common, custom legalize the
14648       // 64-bit integer store into two 32-bit stores.
14649       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14650       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14651       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14652       if (DAG.getDataLayout().isBigEndian())
14653         std::swap(Lo, Hi);
14654 
14655       unsigned Alignment = ST->getAlignment();
14656       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14657       AAMDNodes AAInfo = ST->getAAInfo();
14658 
14659       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14660                                  ST->getAlignment(), MMOFlags, AAInfo);
14661       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14662                         DAG.getConstant(4, DL, Ptr.getValueType()));
14663       Alignment = MinAlign(Alignment, 4U);
14664       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14665                                  ST->getPointerInfo().getWithOffset(4),
14666                                  Alignment, MMOFlags, AAInfo);
14667       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14668                          St0, St1);
14669     }
14670 
14671     return SDValue();
14672   }
14673 }
14674 
14675 SDValue DAGCombiner::visitSTORE(SDNode *N) {
14676   StoreSDNode *ST  = cast<StoreSDNode>(N);
14677   SDValue Chain = ST->getChain();
14678   SDValue Value = ST->getValue();
14679   SDValue Ptr   = ST->getBasePtr();
14680 
14681   // If this is a store of a bit convert, store the input value if the
14682   // resultant store does not need a higher alignment than the original.
14683   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14684       ST->isUnindexed()) {
14685     EVT SVT = Value.getOperand(0).getValueType();
14686     if (((!LegalOperations && !ST->isVolatile()) ||
14687          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
14688         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14689       unsigned OrigAlign = ST->getAlignment();
14690       bool Fast = false;
14691       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14692                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
14693           Fast) {
14694         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14695                             ST->getPointerInfo(), OrigAlign,
14696                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
14697       }
14698     }
14699   }
14700 
14701   // Turn 'store undef, Ptr' -> nothing.
14702   if (Value.isUndef() && ST->isUnindexed())
14703     return Chain;
14704 
14705   // Try to infer better alignment information than the store already has.
14706   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14707     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14708       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14709         SDValue NewStore =
14710             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14711                               ST->getMemoryVT(), Align,
14712                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
14713         // NewStore will always be N as we are only refining the alignment
14714         assert(NewStore.getNode() == N);
14715         (void)NewStore;
14716       }
14717     }
14718   }
14719 
14720   // Try transforming a pair floating point load / store ops to integer
14721   // load / store ops.
14722   if (SDValue NewST = TransformFPLoadStorePair(N))
14723     return NewST;
14724 
14725   if (ST->isUnindexed()) {
14726     // Walk up chain skipping non-aliasing memory nodes, on this store and any
14727     // adjacent stores.
14728     if (findBetterNeighborChains(ST)) {
14729       // replaceStoreChain uses CombineTo, which handled all of the worklist
14730       // manipulation. Return the original node to not do anything else.
14731       return SDValue(ST, 0);
14732     }
14733     Chain = ST->getChain();
14734   }
14735 
14736   // FIXME: is there such a thing as a truncating indexed store?
14737   if (ST->isTruncatingStore() && ST->isUnindexed() &&
14738       Value.getValueType().isInteger()) {
14739     // See if we can simplify the input to this truncstore with knowledge that
14740     // only the low bits are being used.  For example:
14741     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
14742     SDValue Shorter = DAG.GetDemandedBits(
14743         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14744                                     ST->getMemoryVT().getScalarSizeInBits()));
14745     AddToWorklist(Value.getNode());
14746     if (Shorter.getNode())
14747       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
14748                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
14749 
14750     // Otherwise, see if we can simplify the operation with
14751     // SimplifyDemandedBits, which only works if the value has a single use.
14752     if (SimplifyDemandedBits(
14753             Value,
14754             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14755                                  ST->getMemoryVT().getScalarSizeInBits()))) {
14756       // Re-visit the store if anything changed and the store hasn't been merged
14757       // with another node (N is deleted) SimplifyDemandedBits will add Value's
14758       // node back to the worklist if necessary, but we also need to re-visit
14759       // the Store node itself.
14760       if (N->getOpcode() != ISD::DELETED_NODE)
14761         AddToWorklist(N);
14762       return SDValue(N, 0);
14763     }
14764   }
14765 
14766   // If this is a load followed by a store to the same location, then the store
14767   // is dead/noop.
14768   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
14769     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
14770         ST->isUnindexed() && !ST->isVolatile() &&
14771         // There can't be any side effects between the load and store, such as
14772         // a call or store.
14773         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
14774       // The store is dead, remove it.
14775       return Chain;
14776     }
14777   }
14778 
14779   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
14780     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
14781         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
14782         ST->getMemoryVT() == ST1->getMemoryVT()) {
14783       // If this is a store followed by a store with the same value to the same
14784       // location, then the store is dead/noop.
14785       if (ST1->getValue() == Value) {
14786         // The store is dead, remove it.
14787         return Chain;
14788       }
14789 
14790       // If this is a store who's preceeding store to the same location
14791       // and no one other node is chained to that store we can effectively
14792       // drop the store. Do not remove stores to undef as they may be used as
14793       // data sinks.
14794       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
14795           !ST1->getBasePtr().isUndef()) {
14796         // ST1 is fully overwritten and can be elided. Combine with it's chain
14797         // value.
14798         CombineTo(ST1, ST1->getChain());
14799         return SDValue();
14800       }
14801     }
14802   }
14803 
14804   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
14805   // truncating store.  We can do this even if this is already a truncstore.
14806   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
14807       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
14808       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
14809                             ST->getMemoryVT())) {
14810     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
14811                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
14812   }
14813 
14814   // Always perform this optimization before types are legal. If the target
14815   // prefers, also try this after legalization to catch stores that were created
14816   // by intrinsics or other nodes.
14817   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
14818     while (true) {
14819       // There can be multiple store sequences on the same chain.
14820       // Keep trying to merge store sequences until we are unable to do so
14821       // or until we merge the last store on the chain.
14822       bool Changed = MergeConsecutiveStores(ST);
14823       if (!Changed) break;
14824       // Return N as merge only uses CombineTo and no worklist clean
14825       // up is necessary.
14826       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
14827         return SDValue(N, 0);
14828     }
14829   }
14830 
14831   // Try transforming N to an indexed store.
14832   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14833     return SDValue(N, 0);
14834 
14835   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
14836   //
14837   // Make sure to do this only after attempting to merge stores in order to
14838   //  avoid changing the types of some subset of stores due to visit order,
14839   //  preventing their merging.
14840   if (isa<ConstantFPSDNode>(ST->getValue())) {
14841     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
14842       return NewSt;
14843   }
14844 
14845   if (SDValue NewSt = splitMergedValStore(ST))
14846     return NewSt;
14847 
14848   return ReduceLoadOpStoreWidth(N);
14849 }
14850 
14851 /// For the instruction sequence of store below, F and I values
14852 /// are bundled together as an i64 value before being stored into memory.
14853 /// Sometimes it is more efficent to generate separate stores for F and I,
14854 /// which can remove the bitwise instructions or sink them to colder places.
14855 ///
14856 ///   (store (or (zext (bitcast F to i32) to i64),
14857 ///              (shl (zext I to i64), 32)), addr)  -->
14858 ///   (store F, addr) and (store I, addr+4)
14859 ///
14860 /// Similarly, splitting for other merged store can also be beneficial, like:
14861 /// For pair of {i32, i32}, i64 store --> two i32 stores.
14862 /// For pair of {i32, i16}, i64 store --> two i32 stores.
14863 /// For pair of {i16, i16}, i32 store --> two i16 stores.
14864 /// For pair of {i16, i8},  i32 store --> two i16 stores.
14865 /// For pair of {i8, i8},   i16 store --> two i8 stores.
14866 ///
14867 /// We allow each target to determine specifically which kind of splitting is
14868 /// supported.
14869 ///
14870 /// The store patterns are commonly seen from the simple code snippet below
14871 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
14872 ///   void goo(const std::pair<int, float> &);
14873 ///   hoo() {
14874 ///     ...
14875 ///     goo(std::make_pair(tmp, ftmp));
14876 ///     ...
14877 ///   }
14878 ///
14879 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
14880   if (OptLevel == CodeGenOpt::None)
14881     return SDValue();
14882 
14883   SDValue Val = ST->getValue();
14884   SDLoc DL(ST);
14885 
14886   // Match OR operand.
14887   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
14888     return SDValue();
14889 
14890   // Match SHL operand and get Lower and Higher parts of Val.
14891   SDValue Op1 = Val.getOperand(0);
14892   SDValue Op2 = Val.getOperand(1);
14893   SDValue Lo, Hi;
14894   if (Op1.getOpcode() != ISD::SHL) {
14895     std::swap(Op1, Op2);
14896     if (Op1.getOpcode() != ISD::SHL)
14897       return SDValue();
14898   }
14899   Lo = Op2;
14900   Hi = Op1.getOperand(0);
14901   if (!Op1.hasOneUse())
14902     return SDValue();
14903 
14904   // Match shift amount to HalfValBitSize.
14905   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
14906   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
14907   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
14908     return SDValue();
14909 
14910   // Lo and Hi are zero-extended from int with size less equal than 32
14911   // to i64.
14912   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
14913       !Lo.getOperand(0).getValueType().isScalarInteger() ||
14914       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
14915       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
14916       !Hi.getOperand(0).getValueType().isScalarInteger() ||
14917       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
14918     return SDValue();
14919 
14920   // Use the EVT of low and high parts before bitcast as the input
14921   // of target query.
14922   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
14923                   ? Lo.getOperand(0).getValueType()
14924                   : Lo.getValueType();
14925   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
14926                    ? Hi.getOperand(0).getValueType()
14927                    : Hi.getValueType();
14928   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
14929     return SDValue();
14930 
14931   // Start to split store.
14932   unsigned Alignment = ST->getAlignment();
14933   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14934   AAMDNodes AAInfo = ST->getAAInfo();
14935 
14936   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
14937   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
14938   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
14939   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
14940 
14941   SDValue Chain = ST->getChain();
14942   SDValue Ptr = ST->getBasePtr();
14943   // Lower value store.
14944   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14945                              ST->getAlignment(), MMOFlags, AAInfo);
14946   Ptr =
14947       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14948                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
14949   // Higher value store.
14950   SDValue St1 =
14951       DAG.getStore(St0, DL, Hi, Ptr,
14952                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14953                    Alignment / 2, MMOFlags, AAInfo);
14954   return St1;
14955 }
14956 
14957 /// Convert a disguised subvector insertion into a shuffle:
14958 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14959 /// bitcast(shuffle (bitcast V), (extended X), Mask)
14960 /// Note: We do not use an insert_subvector node because that requires a legal
14961 /// subvector type.
14962 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14963   SDValue InsertVal = N->getOperand(1);
14964   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14965       !InsertVal.getOperand(0).getValueType().isVector())
14966     return SDValue();
14967 
14968   SDValue SubVec = InsertVal.getOperand(0);
14969   SDValue DestVec = N->getOperand(0);
14970   EVT SubVecVT = SubVec.getValueType();
14971   EVT VT = DestVec.getValueType();
14972   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14973   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14974   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14975 
14976   // Step 1: Create a shuffle mask that implements this insert operation. The
14977   // vector that we are inserting into will be operand 0 of the shuffle, so
14978   // those elements are just 'i'. The inserted subvector is in the first
14979   // positions of operand 1 of the shuffle. Example:
14980   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14981   SmallVector<int, 16> Mask(NumMaskVals);
14982   for (unsigned i = 0; i != NumMaskVals; ++i) {
14983     if (i / NumSrcElts == InsIndex)
14984       Mask[i] = (i % NumSrcElts) + NumMaskVals;
14985     else
14986       Mask[i] = i;
14987   }
14988 
14989   // Bail out if the target can not handle the shuffle we want to create.
14990   EVT SubVecEltVT = SubVecVT.getVectorElementType();
14991   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14992   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14993     return SDValue();
14994 
14995   // Step 2: Create a wide vector from the inserted source vector by appending
14996   // undefined elements. This is the same size as our destination vector.
14997   SDLoc DL(N);
14998   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14999   ConcatOps[0] = SubVec;
15000   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15001 
15002   // Step 3: Shuffle in the padded subvector.
15003   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15004   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15005   AddToWorklist(PaddedSubV.getNode());
15006   AddToWorklist(DestVecBC.getNode());
15007   AddToWorklist(Shuf.getNode());
15008   return DAG.getBitcast(VT, Shuf);
15009 }
15010 
15011 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15012   SDValue InVec = N->getOperand(0);
15013   SDValue InVal = N->getOperand(1);
15014   SDValue EltNo = N->getOperand(2);
15015   SDLoc DL(N);
15016 
15017   // If the inserted element is an UNDEF, just use the input vector.
15018   if (InVal.isUndef())
15019     return InVec;
15020 
15021   EVT VT = InVec.getValueType();
15022 
15023   // Remove redundant insertions:
15024   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15025   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15026       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15027     return InVec;
15028 
15029   // We must know which element is being inserted for folds below here.
15030   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15031   if (!IndexC)
15032     return SDValue();
15033   unsigned Elt = IndexC->getZExtValue();
15034 
15035   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15036     return Shuf;
15037 
15038   // Canonicalize insert_vector_elt dag nodes.
15039   // Example:
15040   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15041   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15042   //
15043   // Do this only if the child insert_vector node has one use; also
15044   // do this only if indices are both constants and Idx1 < Idx0.
15045   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15046       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15047     unsigned OtherElt = InVec.getConstantOperandVal(2);
15048     if (Elt < OtherElt) {
15049       // Swap nodes.
15050       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15051                                   InVec.getOperand(0), InVal, EltNo);
15052       AddToWorklist(NewOp.getNode());
15053       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15054                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15055     }
15056   }
15057 
15058   // If we can't generate a legal BUILD_VECTOR, exit
15059   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15060     return SDValue();
15061 
15062   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15063   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15064   // vector elements.
15065   SmallVector<SDValue, 8> Ops;
15066   // Do not combine these two vectors if the output vector will not replace
15067   // the input vector.
15068   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15069     Ops.append(InVec.getNode()->op_begin(),
15070                InVec.getNode()->op_end());
15071   } else if (InVec.isUndef()) {
15072     unsigned NElts = VT.getVectorNumElements();
15073     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
15074   } else {
15075     return SDValue();
15076   }
15077 
15078   // Insert the element
15079   if (Elt < Ops.size()) {
15080     // All the operands of BUILD_VECTOR must have the same type;
15081     // we enforce that here.
15082     EVT OpVT = Ops[0].getValueType();
15083     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15084   }
15085 
15086   // Return the new vector
15087   return DAG.getBuildVector(VT, DL, Ops);
15088 }
15089 
15090 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
15091     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
15092   assert(!OriginalLoad->isVolatile());
15093 
15094   EVT ResultVT = EVE->getValueType(0);
15095   EVT VecEltVT = InVecVT.getVectorElementType();
15096   unsigned Align = OriginalLoad->getAlignment();
15097   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15098       VecEltVT.getTypeForEVT(*DAG.getContext()));
15099 
15100   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15101     return SDValue();
15102 
15103   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15104     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15105   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15106     return SDValue();
15107 
15108   Align = NewAlign;
15109 
15110   SDValue NewPtr = OriginalLoad->getBasePtr();
15111   SDValue Offset;
15112   EVT PtrType = NewPtr.getValueType();
15113   MachinePointerInfo MPI;
15114   SDLoc DL(EVE);
15115   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15116     int Elt = ConstEltNo->getZExtValue();
15117     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15118     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15119     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15120   } else {
15121     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15122     Offset = DAG.getNode(
15123         ISD::MUL, DL, PtrType, Offset,
15124         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15125     MPI = OriginalLoad->getPointerInfo();
15126   }
15127   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15128 
15129   // The replacement we need to do here is a little tricky: we need to
15130   // replace an extractelement of a load with a load.
15131   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15132   // Note that this replacement assumes that the extractvalue is the only
15133   // use of the load; that's okay because we don't want to perform this
15134   // transformation in other cases anyway.
15135   SDValue Load;
15136   SDValue Chain;
15137   if (ResultVT.bitsGT(VecEltVT)) {
15138     // If the result type of vextract is wider than the load, then issue an
15139     // extending load instead.
15140     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15141                                                   VecEltVT)
15142                                    ? ISD::ZEXTLOAD
15143                                    : ISD::EXTLOAD;
15144     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15145                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15146                           Align, OriginalLoad->getMemOperand()->getFlags(),
15147                           OriginalLoad->getAAInfo());
15148     Chain = Load.getValue(1);
15149   } else {
15150     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15151                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15152                        OriginalLoad->getAAInfo());
15153     Chain = Load.getValue(1);
15154     if (ResultVT.bitsLT(VecEltVT))
15155       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15156     else
15157       Load = DAG.getBitcast(ResultVT, Load);
15158   }
15159   WorklistRemover DeadNodes(*this);
15160   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15161   SDValue To[] = { Load, Chain };
15162   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15163   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15164   // worklist explicitly as well.
15165   AddToWorklist(Load.getNode());
15166   AddUsersToWorklist(Load.getNode()); // Add users too
15167   // Make sure to revisit this node to clean it up; it will usually be dead.
15168   AddToWorklist(EVE);
15169   ++OpsNarrowed;
15170   return SDValue(EVE, 0);
15171 }
15172 
15173 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15174   // (vextract (scalar_to_vector val, 0) -> val
15175   SDValue InVec = N->getOperand(0);
15176   EVT VT = InVec.getValueType();
15177   EVT NVT = N->getValueType(0);
15178 
15179   if (InVec.isUndef())
15180     return DAG.getUNDEF(NVT);
15181 
15182   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15183     // Check if the result type doesn't match the inserted element type. A
15184     // SCALAR_TO_VECTOR may truncate the inserted element and the
15185     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15186     SDValue InOp = InVec.getOperand(0);
15187     if (InOp.getValueType() != NVT) {
15188       assert(InOp.getValueType().isInteger() && NVT.isInteger());
15189       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
15190     }
15191     return InOp;
15192   }
15193 
15194   SDValue EltNo = N->getOperand(1);
15195   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
15196 
15197   // extract_vector_elt of out-of-bounds element -> UNDEF
15198   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
15199     return DAG.getUNDEF(NVT);
15200 
15201   // extract_vector_elt (build_vector x, y), 1 -> y
15202   if (ConstEltNo &&
15203       InVec.getOpcode() == ISD::BUILD_VECTOR &&
15204       TLI.isTypeLegal(VT) &&
15205       (InVec.hasOneUse() ||
15206        TLI.aggressivelyPreferBuildVectorSources(VT))) {
15207     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
15208     EVT InEltVT = Elt.getValueType();
15209 
15210     // Sometimes build_vector's scalar input types do not match result type.
15211     if (NVT == InEltVT)
15212       return Elt;
15213 
15214     // TODO: It may be useful to truncate if free if the build_vector implicitly
15215     // converts.
15216   }
15217 
15218   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
15219   bool isLE = DAG.getDataLayout().isLittleEndian();
15220   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
15221   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
15222       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
15223     SDValue BCSrc = InVec.getOperand(0);
15224     if (BCSrc.getValueType().isScalarInteger())
15225       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
15226   }
15227 
15228   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15229   //
15230   // This only really matters if the index is non-constant since other combines
15231   // on the constant elements already work.
15232   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15233       EltNo == InVec.getOperand(2)) {
15234     SDValue Elt = InVec.getOperand(1);
15235     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
15236   }
15237 
15238   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15239   // We only perform this optimization before the op legalization phase because
15240   // we may introduce new vector instructions which are not backed by TD
15241   // patterns. For example on AVX, extracting elements from a wide vector
15242   // without using extract_subvector. However, if we can find an underlying
15243   // scalar value, then we can always use that.
15244   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
15245     int NumElem = VT.getVectorNumElements();
15246     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
15247     // Find the new index to extract from.
15248     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
15249 
15250     // Extracting an undef index is undef.
15251     if (OrigElt == -1)
15252       return DAG.getUNDEF(NVT);
15253 
15254     // Select the right vector half to extract from.
15255     SDValue SVInVec;
15256     if (OrigElt < NumElem) {
15257       SVInVec = InVec->getOperand(0);
15258     } else {
15259       SVInVec = InVec->getOperand(1);
15260       OrigElt -= NumElem;
15261     }
15262 
15263     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15264       SDValue InOp = SVInVec.getOperand(OrigElt);
15265       if (InOp.getValueType() != NVT) {
15266         assert(InOp.getValueType().isInteger() && NVT.isInteger());
15267         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
15268       }
15269 
15270       return InOp;
15271     }
15272 
15273     // FIXME: We should handle recursing on other vector shuffles and
15274     // scalar_to_vector here as well.
15275 
15276     if (!LegalOperations ||
15277         // FIXME: Should really be just isOperationLegalOrCustom.
15278         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
15279         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
15280       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15281       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
15282                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
15283     }
15284   }
15285 
15286   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15287   // simplify it based on the (valid) extraction indices.
15288   if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15289         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15290                Use->getOperand(0) == InVec &&
15291                isa<ConstantSDNode>(Use->getOperand(1));
15292       })) {
15293     APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15294     for (SDNode *Use : InVec->uses()) {
15295       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15296       if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15297         DemandedElts.setBit(CstElt->getZExtValue());
15298     }
15299     if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15300       return SDValue(N, 0);
15301   }
15302 
15303   bool BCNumEltsChanged = false;
15304   EVT ExtVT = VT.getVectorElementType();
15305   EVT LVT = ExtVT;
15306 
15307   // If the result of load has to be truncated, then it's not necessarily
15308   // profitable.
15309   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
15310     return SDValue();
15311 
15312   if (InVec.getOpcode() == ISD::BITCAST) {
15313     // Don't duplicate a load with other uses.
15314     if (!InVec.hasOneUse())
15315       return SDValue();
15316 
15317     EVT BCVT = InVec.getOperand(0).getValueType();
15318     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15319       return SDValue();
15320     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
15321       BCNumEltsChanged = true;
15322     InVec = InVec.getOperand(0);
15323     ExtVT = BCVT.getVectorElementType();
15324   }
15325 
15326   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
15327   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
15328       ISD::isNormalLoad(InVec.getNode()) &&
15329       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
15330     SDValue Index = N->getOperand(1);
15331     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
15332       if (!OrigLoad->isVolatile()) {
15333         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
15334                                                              OrigLoad);
15335       }
15336     }
15337   }
15338 
15339   // Perform only after legalization to ensure build_vector / vector_shuffle
15340   // optimizations have already been done.
15341   if (!LegalOperations) return SDValue();
15342 
15343   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15344   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15345   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15346 
15347   if (ConstEltNo) {
15348     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
15349 
15350     LoadSDNode *LN0 = nullptr;
15351     const ShuffleVectorSDNode *SVN = nullptr;
15352     if (ISD::isNormalLoad(InVec.getNode())) {
15353       LN0 = cast<LoadSDNode>(InVec);
15354     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15355                InVec.getOperand(0).getValueType() == ExtVT &&
15356                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
15357       // Don't duplicate a load with other uses.
15358       if (!InVec.hasOneUse())
15359         return SDValue();
15360 
15361       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
15362     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
15363       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15364       // =>
15365       // (load $addr+1*size)
15366 
15367       // Don't duplicate a load with other uses.
15368       if (!InVec.hasOneUse())
15369         return SDValue();
15370 
15371       // If the bit convert changed the number of elements, it is unsafe
15372       // to examine the mask.
15373       if (BCNumEltsChanged)
15374         return SDValue();
15375 
15376       // Select the input vector, guarding against out of range extract vector.
15377       unsigned NumElems = VT.getVectorNumElements();
15378       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
15379       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
15380 
15381       if (InVec.getOpcode() == ISD::BITCAST) {
15382         // Don't duplicate a load with other uses.
15383         if (!InVec.hasOneUse())
15384           return SDValue();
15385 
15386         InVec = InVec.getOperand(0);
15387       }
15388       if (ISD::isNormalLoad(InVec.getNode())) {
15389         LN0 = cast<LoadSDNode>(InVec);
15390         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
15391         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
15392       }
15393     }
15394 
15395     // Make sure we found a non-volatile load and the extractelement is
15396     // the only use.
15397     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15398       return SDValue();
15399 
15400     // If Idx was -1 above, Elt is going to be -1, so just return undef.
15401     if (Elt == -1)
15402       return DAG.getUNDEF(LVT);
15403 
15404     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
15405   }
15406 
15407   return SDValue();
15408 }
15409 
15410 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15411 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15412   // We perform this optimization post type-legalization because
15413   // the type-legalizer often scalarizes integer-promoted vectors.
15414   // Performing this optimization before may create bit-casts which
15415   // will be type-legalized to complex code sequences.
15416   // We perform this optimization only before the operation legalizer because we
15417   // may introduce illegal operations.
15418   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15419     return SDValue();
15420 
15421   unsigned NumInScalars = N->getNumOperands();
15422   SDLoc DL(N);
15423   EVT VT = N->getValueType(0);
15424 
15425   // Check to see if this is a BUILD_VECTOR of a bunch of values
15426   // which come from any_extend or zero_extend nodes. If so, we can create
15427   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15428   // optimizations. We do not handle sign-extend because we can't fill the sign
15429   // using shuffles.
15430   EVT SourceType = MVT::Other;
15431   bool AllAnyExt = true;
15432 
15433   for (unsigned i = 0; i != NumInScalars; ++i) {
15434     SDValue In = N->getOperand(i);
15435     // Ignore undef inputs.
15436     if (In.isUndef()) continue;
15437 
15438     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
15439     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15440 
15441     // Abort if the element is not an extension.
15442     if (!ZeroExt && !AnyExt) {
15443       SourceType = MVT::Other;
15444       break;
15445     }
15446 
15447     // The input is a ZeroExt or AnyExt. Check the original type.
15448     EVT InTy = In.getOperand(0).getValueType();
15449 
15450     // Check that all of the widened source types are the same.
15451     if (SourceType == MVT::Other)
15452       // First time.
15453       SourceType = InTy;
15454     else if (InTy != SourceType) {
15455       // Multiple income types. Abort.
15456       SourceType = MVT::Other;
15457       break;
15458     }
15459 
15460     // Check if all of the extends are ANY_EXTENDs.
15461     AllAnyExt &= AnyExt;
15462   }
15463 
15464   // In order to have valid types, all of the inputs must be extended from the
15465   // same source type and all of the inputs must be any or zero extend.
15466   // Scalar sizes must be a power of two.
15467   EVT OutScalarTy = VT.getScalarType();
15468   bool ValidTypes = SourceType != MVT::Other &&
15469                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15470                  isPowerOf2_32(SourceType.getSizeInBits());
15471 
15472   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15473   // turn into a single shuffle instruction.
15474   if (!ValidTypes)
15475     return SDValue();
15476 
15477   bool isLE = DAG.getDataLayout().isLittleEndian();
15478   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15479   assert(ElemRatio > 1 && "Invalid element size ratio");
15480   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15481                                DAG.getConstant(0, DL, SourceType);
15482 
15483   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15484   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15485 
15486   // Populate the new build_vector
15487   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15488     SDValue Cast = N->getOperand(i);
15489     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15490             Cast.getOpcode() == ISD::ZERO_EXTEND ||
15491             Cast.isUndef()) && "Invalid cast opcode");
15492     SDValue In;
15493     if (Cast.isUndef())
15494       In = DAG.getUNDEF(SourceType);
15495     else
15496       In = Cast->getOperand(0);
15497     unsigned Index = isLE ? (i * ElemRatio) :
15498                             (i * ElemRatio + (ElemRatio - 1));
15499 
15500     assert(Index < Ops.size() && "Invalid index");
15501     Ops[Index] = In;
15502   }
15503 
15504   // The type of the new BUILD_VECTOR node.
15505   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15506   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15507          "Invalid vector size");
15508   // Check if the new vector type is legal.
15509   if (!isTypeLegal(VecVT) ||
15510       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15511        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15512     return SDValue();
15513 
15514   // Make the new BUILD_VECTOR.
15515   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15516 
15517   // The new BUILD_VECTOR node has the potential to be further optimized.
15518   AddToWorklist(BV.getNode());
15519   // Bitcast to the desired type.
15520   return DAG.getBitcast(VT, BV);
15521 }
15522 
15523 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
15524   EVT VT = N->getValueType(0);
15525 
15526   unsigned NumInScalars = N->getNumOperands();
15527   SDLoc DL(N);
15528 
15529   EVT SrcVT = MVT::Other;
15530   unsigned Opcode = ISD::DELETED_NODE;
15531   unsigned NumDefs = 0;
15532 
15533   for (unsigned i = 0; i != NumInScalars; ++i) {
15534     SDValue In = N->getOperand(i);
15535     unsigned Opc = In.getOpcode();
15536 
15537     if (Opc == ISD::UNDEF)
15538       continue;
15539 
15540     // If all scalar values are floats and converted from integers.
15541     if (Opcode == ISD::DELETED_NODE &&
15542         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
15543       Opcode = Opc;
15544     }
15545 
15546     if (Opc != Opcode)
15547       return SDValue();
15548 
15549     EVT InVT = In.getOperand(0).getValueType();
15550 
15551     // If all scalar values are typed differently, bail out. It's chosen to
15552     // simplify BUILD_VECTOR of integer types.
15553     if (SrcVT == MVT::Other)
15554       SrcVT = InVT;
15555     if (SrcVT != InVT)
15556       return SDValue();
15557     NumDefs++;
15558   }
15559 
15560   // If the vector has just one element defined, it's not worth to fold it into
15561   // a vectorized one.
15562   if (NumDefs < 2)
15563     return SDValue();
15564 
15565   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
15566          && "Should only handle conversion from integer to float.");
15567   assert(SrcVT != MVT::Other && "Cannot determine source type!");
15568 
15569   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
15570 
15571   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
15572     return SDValue();
15573 
15574   // Just because the floating-point vector type is legal does not necessarily
15575   // mean that the corresponding integer vector type is.
15576   if (!isTypeLegal(NVT))
15577     return SDValue();
15578 
15579   SmallVector<SDValue, 8> Opnds;
15580   for (unsigned i = 0; i != NumInScalars; ++i) {
15581     SDValue In = N->getOperand(i);
15582 
15583     if (In.isUndef())
15584       Opnds.push_back(DAG.getUNDEF(SrcVT));
15585     else
15586       Opnds.push_back(In.getOperand(0));
15587   }
15588   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
15589   AddToWorklist(BV.getNode());
15590 
15591   return DAG.getNode(Opcode, DL, VT, BV);
15592 }
15593 
15594 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15595                                            ArrayRef<int> VectorMask,
15596                                            SDValue VecIn1, SDValue VecIn2,
15597                                            unsigned LeftIdx) {
15598   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15599   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15600 
15601   EVT VT = N->getValueType(0);
15602   EVT InVT1 = VecIn1.getValueType();
15603   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15604 
15605   unsigned Vec2Offset = 0;
15606   unsigned NumElems = VT.getVectorNumElements();
15607   unsigned ShuffleNumElems = NumElems;
15608 
15609   // In case both the input vectors are extracted from same base
15610   // vector we do not need extra addend (Vec2Offset) while
15611   // computing shuffle mask.
15612   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15613       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15614       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15615     Vec2Offset = InVT1.getVectorNumElements();
15616 
15617   // We can't generate a shuffle node with mismatched input and output types.
15618   // Try to make the types match the type of the output.
15619   if (InVT1 != VT || InVT2 != VT) {
15620     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15621       // If the output vector length is a multiple of both input lengths,
15622       // we can concatenate them and pad the rest with undefs.
15623       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15624       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15625       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15626       ConcatOps[0] = VecIn1;
15627       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15628       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15629       VecIn2 = SDValue();
15630     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15631       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15632         return SDValue();
15633 
15634       if (!VecIn2.getNode()) {
15635         // If we only have one input vector, and it's twice the size of the
15636         // output, split it in two.
15637         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15638                              DAG.getConstant(NumElems, DL, IdxTy));
15639         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15640         // Since we now have shorter input vectors, adjust the offset of the
15641         // second vector's start.
15642         Vec2Offset = NumElems;
15643       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15644         // VecIn1 is wider than the output, and we have another, possibly
15645         // smaller input. Pad the smaller input with undefs, shuffle at the
15646         // input vector width, and extract the output.
15647         // The shuffle type is different than VT, so check legality again.
15648         if (LegalOperations &&
15649             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15650           return SDValue();
15651 
15652         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15653         // lower it back into a BUILD_VECTOR. So if the inserted type is
15654         // illegal, don't even try.
15655         if (InVT1 != InVT2) {
15656           if (!TLI.isTypeLegal(InVT2))
15657             return SDValue();
15658           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15659                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15660         }
15661         ShuffleNumElems = NumElems * 2;
15662       } else {
15663         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15664         // than VecIn1. We can't handle this for now - this case will disappear
15665         // when we start sorting the vectors by type.
15666         return SDValue();
15667       }
15668     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15669                InVT1.getSizeInBits() == VT.getSizeInBits()) {
15670       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15671       ConcatOps[0] = VecIn2;
15672       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15673     } else {
15674       // TODO: Support cases where the length mismatch isn't exactly by a
15675       // factor of 2.
15676       // TODO: Move this check upwards, so that if we have bad type
15677       // mismatches, we don't create any DAG nodes.
15678       return SDValue();
15679     }
15680   }
15681 
15682   // Initialize mask to undef.
15683   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15684 
15685   // Only need to run up to the number of elements actually used, not the
15686   // total number of elements in the shuffle - if we are shuffling a wider
15687   // vector, the high lanes should be set to undef.
15688   for (unsigned i = 0; i != NumElems; ++i) {
15689     if (VectorMask[i] <= 0)
15690       continue;
15691 
15692     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15693     if (VectorMask[i] == (int)LeftIdx) {
15694       Mask[i] = ExtIndex;
15695     } else if (VectorMask[i] == (int)LeftIdx + 1) {
15696       Mask[i] = Vec2Offset + ExtIndex;
15697     }
15698   }
15699 
15700   // The type the input vectors may have changed above.
15701   InVT1 = VecIn1.getValueType();
15702 
15703   // If we already have a VecIn2, it should have the same type as VecIn1.
15704   // If we don't, get an undef/zero vector of the appropriate type.
15705   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15706   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15707 
15708   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15709   if (ShuffleNumElems > NumElems)
15710     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
15711 
15712   return Shuffle;
15713 }
15714 
15715 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
15716 // operations. If the types of the vectors we're extracting from allow it,
15717 // turn this into a vector_shuffle node.
15718 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
15719   SDLoc DL(N);
15720   EVT VT = N->getValueType(0);
15721 
15722   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
15723   if (!isTypeLegal(VT))
15724     return SDValue();
15725 
15726   // May only combine to shuffle after legalize if shuffle is legal.
15727   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
15728     return SDValue();
15729 
15730   bool UsesZeroVector = false;
15731   unsigned NumElems = N->getNumOperands();
15732 
15733   // Record, for each element of the newly built vector, which input vector
15734   // that element comes from. -1 stands for undef, 0 for the zero vector,
15735   // and positive values for the input vectors.
15736   // VectorMask maps each element to its vector number, and VecIn maps vector
15737   // numbers to their initial SDValues.
15738 
15739   SmallVector<int, 8> VectorMask(NumElems, -1);
15740   SmallVector<SDValue, 8> VecIn;
15741   VecIn.push_back(SDValue());
15742 
15743   for (unsigned i = 0; i != NumElems; ++i) {
15744     SDValue Op = N->getOperand(i);
15745 
15746     if (Op.isUndef())
15747       continue;
15748 
15749     // See if we can use a blend with a zero vector.
15750     // TODO: Should we generalize this to a blend with an arbitrary constant
15751     // vector?
15752     if (isNullConstant(Op) || isNullFPConstant(Op)) {
15753       UsesZeroVector = true;
15754       VectorMask[i] = 0;
15755       continue;
15756     }
15757 
15758     // Not an undef or zero. If the input is something other than an
15759     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
15760     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15761         !isa<ConstantSDNode>(Op.getOperand(1)))
15762       return SDValue();
15763     SDValue ExtractedFromVec = Op.getOperand(0);
15764 
15765     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
15766     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
15767       return SDValue();
15768 
15769     // All inputs must have the same element type as the output.
15770     if (VT.getVectorElementType() !=
15771         ExtractedFromVec.getValueType().getVectorElementType())
15772       return SDValue();
15773 
15774     // Have we seen this input vector before?
15775     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
15776     // a map back from SDValues to numbers isn't worth it.
15777     unsigned Idx = std::distance(
15778         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
15779     if (Idx == VecIn.size())
15780       VecIn.push_back(ExtractedFromVec);
15781 
15782     VectorMask[i] = Idx;
15783   }
15784 
15785   // If we didn't find at least one input vector, bail out.
15786   if (VecIn.size() < 2)
15787     return SDValue();
15788 
15789   // If all the Operands of BUILD_VECTOR extract from same
15790   // vector, then split the vector efficiently based on the maximum
15791   // vector access index and adjust the VectorMask and
15792   // VecIn accordingly.
15793   if (VecIn.size() == 2) {
15794     unsigned MaxIndex = 0;
15795     unsigned NearestPow2 = 0;
15796     SDValue Vec = VecIn.back();
15797     EVT InVT = Vec.getValueType();
15798     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15799     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
15800 
15801     for (unsigned i = 0; i < NumElems; i++) {
15802       if (VectorMask[i] <= 0)
15803         continue;
15804       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
15805       IndexVec[i] = Index;
15806       MaxIndex = std::max(MaxIndex, Index);
15807     }
15808 
15809     NearestPow2 = PowerOf2Ceil(MaxIndex);
15810     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
15811         NumElems * 2 < NearestPow2) {
15812       unsigned SplitSize = NearestPow2 / 2;
15813       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
15814                                      InVT.getVectorElementType(), SplitSize);
15815       if (TLI.isTypeLegal(SplitVT)) {
15816         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15817                                      DAG.getConstant(SplitSize, DL, IdxTy));
15818         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15819                                      DAG.getConstant(0, DL, IdxTy));
15820         VecIn.pop_back();
15821         VecIn.push_back(VecIn1);
15822         VecIn.push_back(VecIn2);
15823 
15824         for (unsigned i = 0; i < NumElems; i++) {
15825           if (VectorMask[i] <= 0)
15826             continue;
15827           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
15828         }
15829       }
15830     }
15831   }
15832 
15833   // TODO: We want to sort the vectors by descending length, so that adjacent
15834   // pairs have similar length, and the longer vector is always first in the
15835   // pair.
15836 
15837   // TODO: Should this fire if some of the input vectors has illegal type (like
15838   // it does now), or should we let legalization run its course first?
15839 
15840   // Shuffle phase:
15841   // Take pairs of vectors, and shuffle them so that the result has elements
15842   // from these vectors in the correct places.
15843   // For example, given:
15844   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
15845   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
15846   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
15847   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
15848   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
15849   // We will generate:
15850   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
15851   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
15852   SmallVector<SDValue, 4> Shuffles;
15853   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
15854     unsigned LeftIdx = 2 * In + 1;
15855     SDValue VecLeft = VecIn[LeftIdx];
15856     SDValue VecRight =
15857         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
15858 
15859     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
15860                                                 VecRight, LeftIdx))
15861       Shuffles.push_back(Shuffle);
15862     else
15863       return SDValue();
15864   }
15865 
15866   // If we need the zero vector as an "ingredient" in the blend tree, add it
15867   // to the list of shuffles.
15868   if (UsesZeroVector)
15869     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
15870                                       : DAG.getConstantFP(0.0, DL, VT));
15871 
15872   // If we only have one shuffle, we're done.
15873   if (Shuffles.size() == 1)
15874     return Shuffles[0];
15875 
15876   // Update the vector mask to point to the post-shuffle vectors.
15877   for (int &Vec : VectorMask)
15878     if (Vec == 0)
15879       Vec = Shuffles.size() - 1;
15880     else
15881       Vec = (Vec - 1) / 2;
15882 
15883   // More than one shuffle. Generate a binary tree of blends, e.g. if from
15884   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
15885   // generate:
15886   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
15887   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
15888   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
15889   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
15890   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
15891   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
15892   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
15893 
15894   // Make sure the initial size of the shuffle list is even.
15895   if (Shuffles.size() % 2)
15896     Shuffles.push_back(DAG.getUNDEF(VT));
15897 
15898   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
15899     if (CurSize % 2) {
15900       Shuffles[CurSize] = DAG.getUNDEF(VT);
15901       CurSize++;
15902     }
15903     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
15904       int Left = 2 * In;
15905       int Right = 2 * In + 1;
15906       SmallVector<int, 8> Mask(NumElems, -1);
15907       for (unsigned i = 0; i != NumElems; ++i) {
15908         if (VectorMask[i] == Left) {
15909           Mask[i] = i;
15910           VectorMask[i] = In;
15911         } else if (VectorMask[i] == Right) {
15912           Mask[i] = i + NumElems;
15913           VectorMask[i] = In;
15914         }
15915       }
15916 
15917       Shuffles[In] =
15918           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
15919     }
15920   }
15921   return Shuffles[0];
15922 }
15923 
15924 // Try to turn a build vector of zero extends of extract vector elts into a
15925 // a vector zero extend and possibly an extract subvector.
15926 // TODO: Support sign extend or any extend?
15927 // TODO: Allow undef elements?
15928 // TODO: Don't require the extracts to start at element 0.
15929 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
15930   if (LegalOperations)
15931     return SDValue();
15932 
15933   EVT VT = N->getValueType(0);
15934 
15935   SDValue Op0 = N->getOperand(0);
15936   auto checkElem = [&](SDValue Op) -> int64_t {
15937     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
15938         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15939         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
15940       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
15941         return C->getZExtValue();
15942     return -1;
15943   };
15944 
15945   // Make sure the first element matches
15946   // (zext (extract_vector_elt X, C))
15947   int64_t Offset = checkElem(Op0);
15948   if (Offset < 0)
15949     return SDValue();
15950 
15951   unsigned NumElems = N->getNumOperands();
15952   SDValue In = Op0.getOperand(0).getOperand(0);
15953   EVT InSVT = In.getValueType().getScalarType();
15954   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
15955 
15956   // Don't create an illegal input type after type legalization.
15957   if (LegalTypes && !TLI.isTypeLegal(InVT))
15958     return SDValue();
15959 
15960   // Ensure all the elements come from the same vector and are adjacent.
15961   for (unsigned i = 1; i != NumElems; ++i) {
15962     if ((Offset + i) != checkElem(N->getOperand(i)))
15963       return SDValue();
15964   }
15965 
15966   SDLoc DL(N);
15967   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
15968                    Op0.getOperand(0).getOperand(1));
15969   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
15970 }
15971 
15972 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
15973   EVT VT = N->getValueType(0);
15974 
15975   // A vector built entirely of undefs is undef.
15976   if (ISD::allOperandsUndef(N))
15977     return DAG.getUNDEF(VT);
15978 
15979   // If this is a splat of a bitcast from another vector, change to a
15980   // concat_vector.
15981   // For example:
15982   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
15983   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
15984   //
15985   // If X is a build_vector itself, the concat can become a larger build_vector.
15986   // TODO: Maybe this is useful for non-splat too?
15987   if (!LegalOperations) {
15988     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
15989       Splat = peekThroughBitcast(Splat);
15990       EVT SrcVT = Splat.getValueType();
15991       if (SrcVT.isVector()) {
15992         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
15993         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
15994                                      SrcVT.getVectorElementType(), NumElts);
15995         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
15996           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
15997           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
15998                                        NewVT, Ops);
15999           return DAG.getBitcast(VT, Concat);
16000         }
16001       }
16002     }
16003   }
16004 
16005   // Check if we can express BUILD VECTOR via subvector extract.
16006   if (!LegalTypes && (N->getNumOperands() > 1)) {
16007     SDValue Op0 = N->getOperand(0);
16008     auto checkElem = [&](SDValue Op) -> uint64_t {
16009       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16010           (Op0.getOperand(0) == Op.getOperand(0)))
16011         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16012           return CNode->getZExtValue();
16013       return -1;
16014     };
16015 
16016     int Offset = checkElem(Op0);
16017     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16018       if (Offset + i != checkElem(N->getOperand(i))) {
16019         Offset = -1;
16020         break;
16021       }
16022     }
16023 
16024     if ((Offset == 0) &&
16025         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16026       return Op0.getOperand(0);
16027     if ((Offset != -1) &&
16028         ((Offset % N->getValueType(0).getVectorNumElements()) ==
16029          0)) // IDX must be multiple of output size.
16030       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16031                          Op0.getOperand(0), Op0.getOperand(1));
16032   }
16033 
16034   if (SDValue V = convertBuildVecZextToZext(N))
16035     return V;
16036 
16037   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16038     return V;
16039 
16040   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
16041     return V;
16042 
16043   if (SDValue V = reduceBuildVecToShuffle(N))
16044     return V;
16045 
16046   return SDValue();
16047 }
16048 
16049 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16050   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16051   EVT OpVT = N->getOperand(0).getValueType();
16052 
16053   // If the operands are legal vectors, leave them alone.
16054   if (TLI.isTypeLegal(OpVT))
16055     return SDValue();
16056 
16057   SDLoc DL(N);
16058   EVT VT = N->getValueType(0);
16059   SmallVector<SDValue, 8> Ops;
16060 
16061   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16062   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16063 
16064   // Keep track of what we encounter.
16065   bool AnyInteger = false;
16066   bool AnyFP = false;
16067   for (const SDValue &Op : N->ops()) {
16068     if (ISD::BITCAST == Op.getOpcode() &&
16069         !Op.getOperand(0).getValueType().isVector())
16070       Ops.push_back(Op.getOperand(0));
16071     else if (ISD::UNDEF == Op.getOpcode())
16072       Ops.push_back(ScalarUndef);
16073     else
16074       return SDValue();
16075 
16076     // Note whether we encounter an integer or floating point scalar.
16077     // If it's neither, bail out, it could be something weird like x86mmx.
16078     EVT LastOpVT = Ops.back().getValueType();
16079     if (LastOpVT.isFloatingPoint())
16080       AnyFP = true;
16081     else if (LastOpVT.isInteger())
16082       AnyInteger = true;
16083     else
16084       return SDValue();
16085   }
16086 
16087   // If any of the operands is a floating point scalar bitcast to a vector,
16088   // use floating point types throughout, and bitcast everything.
16089   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16090   if (AnyFP) {
16091     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16092     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16093     if (AnyInteger) {
16094       for (SDValue &Op : Ops) {
16095         if (Op.getValueType() == SVT)
16096           continue;
16097         if (Op.isUndef())
16098           Op = ScalarUndef;
16099         else
16100           Op = DAG.getBitcast(SVT, Op);
16101       }
16102     }
16103   }
16104 
16105   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16106                                VT.getSizeInBits() / SVT.getSizeInBits());
16107   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16108 }
16109 
16110 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16111 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16112 // most two distinct vectors the same size as the result, attempt to turn this
16113 // into a legal shuffle.
16114 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16115   EVT VT = N->getValueType(0);
16116   EVT OpVT = N->getOperand(0).getValueType();
16117   int NumElts = VT.getVectorNumElements();
16118   int NumOpElts = OpVT.getVectorNumElements();
16119 
16120   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16121   SmallVector<int, 8> Mask;
16122 
16123   for (SDValue Op : N->ops()) {
16124     // Peek through any bitcast.
16125     Op = peekThroughBitcast(Op);
16126 
16127     // UNDEF nodes convert to UNDEF shuffle mask values.
16128     if (Op.isUndef()) {
16129       Mask.append((unsigned)NumOpElts, -1);
16130       continue;
16131     }
16132 
16133     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16134       return SDValue();
16135 
16136     // What vector are we extracting the subvector from and at what index?
16137     SDValue ExtVec = Op.getOperand(0);
16138 
16139     // We want the EVT of the original extraction to correctly scale the
16140     // extraction index.
16141     EVT ExtVT = ExtVec.getValueType();
16142 
16143     // Peek through any bitcast.
16144     ExtVec = peekThroughBitcast(ExtVec);
16145 
16146     // UNDEF nodes convert to UNDEF shuffle mask values.
16147     if (ExtVec.isUndef()) {
16148       Mask.append((unsigned)NumOpElts, -1);
16149       continue;
16150     }
16151 
16152     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16153       return SDValue();
16154     int ExtIdx = Op.getConstantOperandVal(1);
16155 
16156     // Ensure that we are extracting a subvector from a vector the same
16157     // size as the result.
16158     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16159       return SDValue();
16160 
16161     // Scale the subvector index to account for any bitcast.
16162     int NumExtElts = ExtVT.getVectorNumElements();
16163     if (0 == (NumExtElts % NumElts))
16164       ExtIdx /= (NumExtElts / NumElts);
16165     else if (0 == (NumElts % NumExtElts))
16166       ExtIdx *= (NumElts / NumExtElts);
16167     else
16168       return SDValue();
16169 
16170     // At most we can reference 2 inputs in the final shuffle.
16171     if (SV0.isUndef() || SV0 == ExtVec) {
16172       SV0 = ExtVec;
16173       for (int i = 0; i != NumOpElts; ++i)
16174         Mask.push_back(i + ExtIdx);
16175     } else if (SV1.isUndef() || SV1 == ExtVec) {
16176       SV1 = ExtVec;
16177       for (int i = 0; i != NumOpElts; ++i)
16178         Mask.push_back(i + ExtIdx + NumElts);
16179     } else {
16180       return SDValue();
16181     }
16182   }
16183 
16184   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16185     return SDValue();
16186 
16187   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16188                               DAG.getBitcast(VT, SV1), Mask);
16189 }
16190 
16191 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16192   // If we only have one input vector, we don't need to do any concatenation.
16193   if (N->getNumOperands() == 1)
16194     return N->getOperand(0);
16195 
16196   // Check if all of the operands are undefs.
16197   EVT VT = N->getValueType(0);
16198   if (ISD::allOperandsUndef(N))
16199     return DAG.getUNDEF(VT);
16200 
16201   // Optimize concat_vectors where all but the first of the vectors are undef.
16202   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16203         return Op.isUndef();
16204       })) {
16205     SDValue In = N->getOperand(0);
16206     assert(In.getValueType().isVector() && "Must concat vectors");
16207 
16208     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
16209     if (In->getOpcode() == ISD::BITCAST &&
16210         !In->getOperand(0).getValueType().isVector()) {
16211       SDValue Scalar = In->getOperand(0);
16212 
16213       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16214       // look through the trunc so we can still do the transform:
16215       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16216       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16217           !TLI.isTypeLegal(Scalar.getValueType()) &&
16218           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16219         Scalar = Scalar->getOperand(0);
16220 
16221       EVT SclTy = Scalar->getValueType(0);
16222 
16223       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16224         return SDValue();
16225 
16226       // Bail out if the vector size is not a multiple of the scalar size.
16227       if (VT.getSizeInBits() % SclTy.getSizeInBits())
16228         return SDValue();
16229 
16230       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16231       if (VNTNumElms < 2)
16232         return SDValue();
16233 
16234       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16235       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16236         return SDValue();
16237 
16238       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16239       return DAG.getBitcast(VT, Res);
16240     }
16241   }
16242 
16243   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16244   // We have already tested above for an UNDEF only concatenation.
16245   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16246   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16247   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16248     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16249   };
16250   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16251     SmallVector<SDValue, 8> Opnds;
16252     EVT SVT = VT.getScalarType();
16253 
16254     EVT MinVT = SVT;
16255     if (!SVT.isFloatingPoint()) {
16256       // If BUILD_VECTOR are from built from integer, they may have different
16257       // operand types. Get the smallest type and truncate all operands to it.
16258       bool FoundMinVT = false;
16259       for (const SDValue &Op : N->ops())
16260         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16261           EVT OpSVT = Op.getOperand(0).getValueType();
16262           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16263           FoundMinVT = true;
16264         }
16265       assert(FoundMinVT && "Concat vector type mismatch");
16266     }
16267 
16268     for (const SDValue &Op : N->ops()) {
16269       EVT OpVT = Op.getValueType();
16270       unsigned NumElts = OpVT.getVectorNumElements();
16271 
16272       if (ISD::UNDEF == Op.getOpcode())
16273         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16274 
16275       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16276         if (SVT.isFloatingPoint()) {
16277           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16278           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16279         } else {
16280           for (unsigned i = 0; i != NumElts; ++i)
16281             Opnds.push_back(
16282                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16283         }
16284       }
16285     }
16286 
16287     assert(VT.getVectorNumElements() == Opnds.size() &&
16288            "Concat vector type mismatch");
16289     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16290   }
16291 
16292   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16293   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16294     return V;
16295 
16296   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16297   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16298     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16299       return V;
16300 
16301   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16302   // nodes often generate nop CONCAT_VECTOR nodes.
16303   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16304   // place the incoming vectors at the exact same location.
16305   SDValue SingleSource = SDValue();
16306   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16307 
16308   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16309     SDValue Op = N->getOperand(i);
16310 
16311     if (Op.isUndef())
16312       continue;
16313 
16314     // Check if this is the identity extract:
16315     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16316       return SDValue();
16317 
16318     // Find the single incoming vector for the extract_subvector.
16319     if (SingleSource.getNode()) {
16320       if (Op.getOperand(0) != SingleSource)
16321         return SDValue();
16322     } else {
16323       SingleSource = Op.getOperand(0);
16324 
16325       // Check the source type is the same as the type of the result.
16326       // If not, this concat may extend the vector, so we can not
16327       // optimize it away.
16328       if (SingleSource.getValueType() != N->getValueType(0))
16329         return SDValue();
16330     }
16331 
16332     unsigned IdentityIndex = i * PartNumElem;
16333     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16334     // The extract index must be constant.
16335     if (!CS)
16336       return SDValue();
16337 
16338     // Check that we are reading from the identity index.
16339     if (CS->getZExtValue() != IdentityIndex)
16340       return SDValue();
16341   }
16342 
16343   if (SingleSource.getNode())
16344     return SingleSource;
16345 
16346   return SDValue();
16347 }
16348 
16349 /// If we are extracting a subvector produced by a wide binary operator with at
16350 /// at least one operand that was the result of a vector concatenation, then try
16351 /// to use the narrow vector operands directly to avoid the concatenation and
16352 /// extraction.
16353 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16354   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16355   // some of these bailouts with other transforms.
16356 
16357   // The extract index must be a constant, so we can map it to a concat operand.
16358   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16359   if (!ExtractIndex)
16360     return SDValue();
16361 
16362   // Only handle the case where we are doubling and then halving. A larger ratio
16363   // may require more than two narrow binops to replace the wide binop.
16364   EVT VT = Extract->getValueType(0);
16365   unsigned NumElems = VT.getVectorNumElements();
16366   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
16367          "Extract index is not a multiple of the vector length.");
16368   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
16369     return SDValue();
16370 
16371   // We are looking for an optionally bitcasted wide vector binary operator
16372   // feeding an extract subvector.
16373   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
16374 
16375   // TODO: The motivating case for this transform is an x86 AVX1 target. That
16376   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16377   // flavors, but no other 256-bit integer support. This could be extended to
16378   // handle any binop, but that may require fixing/adding other folds to avoid
16379   // codegen regressions.
16380   unsigned BOpcode = BinOp.getOpcode();
16381   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16382     return SDValue();
16383 
16384   // The binop must be a vector type, so we can chop it in half.
16385   EVT WideBVT = BinOp.getValueType();
16386   if (!WideBVT.isVector())
16387     return SDValue();
16388 
16389   // Bail out if the target does not support a narrower version of the binop.
16390   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16391                                    WideBVT.getVectorNumElements() / 2);
16392   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16393   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16394     return SDValue();
16395 
16396   // Peek through bitcasts of the binary operator operands if needed.
16397   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
16398   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
16399 
16400   // We need at least one concatenation operation of a binop operand to make
16401   // this transform worthwhile. The concat must double the input vector sizes.
16402   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16403   bool ConcatL =
16404       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
16405   bool ConcatR =
16406       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
16407   if (!ConcatL && !ConcatR)
16408     return SDValue();
16409 
16410   // If one of the binop operands was not the result of a concat, we must
16411   // extract a half-sized operand for our new narrow binop. We can't just reuse
16412   // the original extract index operand because we may have bitcasted.
16413   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
16414   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16415   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16416   SDLoc DL(Extract);
16417 
16418   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
16419   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
16420   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
16421   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
16422                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16423                                     BinOp.getOperand(0),
16424                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16425 
16426   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
16427                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16428                                     BinOp.getOperand(1),
16429                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16430 
16431   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
16432   return DAG.getBitcast(VT, NarrowBinOp);
16433 }
16434 
16435 /// If we are extracting a subvector from a wide vector load, convert to a
16436 /// narrow load to eliminate the extraction:
16437 /// (extract_subvector (load wide vector)) --> (load narrow vector)
16438 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16439   // TODO: Add support for big-endian. The offset calculation must be adjusted.
16440   if (DAG.getDataLayout().isBigEndian())
16441     return SDValue();
16442 
16443   // TODO: The one-use check is overly conservative. Check the cost of the
16444   // extract instead or remove that condition entirely.
16445   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16446   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16447   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
16448       !ExtIdx)
16449     return SDValue();
16450 
16451   // The narrow load will be offset from the base address of the old load if
16452   // we are extracting from something besides index 0 (little-endian).
16453   EVT VT = Extract->getValueType(0);
16454   SDLoc DL(Extract);
16455   SDValue BaseAddr = Ld->getOperand(1);
16456   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16457 
16458   // TODO: Use "BaseIndexOffset" to make this more effective.
16459   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16460   MachineFunction &MF = DAG.getMachineFunction();
16461   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16462                                                    VT.getStoreSize());
16463   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16464   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16465   return NewLd;
16466 }
16467 
16468 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16469   EVT NVT = N->getValueType(0);
16470   SDValue V = N->getOperand(0);
16471 
16472   // Extract from UNDEF is UNDEF.
16473   if (V.isUndef())
16474     return DAG.getUNDEF(NVT);
16475 
16476   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16477     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16478       return NarrowLoad;
16479 
16480   // Combine:
16481   //    (extract_subvec (concat V1, V2, ...), i)
16482   // Into:
16483   //    Vi if possible
16484   // Only operand 0 is checked as 'concat' assumes all inputs of the same
16485   // type.
16486   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16487       isa<ConstantSDNode>(N->getOperand(1)) &&
16488       V->getOperand(0).getValueType() == NVT) {
16489     unsigned Idx = N->getConstantOperandVal(1);
16490     unsigned NumElems = NVT.getVectorNumElements();
16491     assert((Idx % NumElems) == 0 &&
16492            "IDX in concat is not a multiple of the result vector length.");
16493     return V->getOperand(Idx / NumElems);
16494   }
16495 
16496   // Skip bitcasting
16497   V = peekThroughBitcast(V);
16498 
16499   // If the input is a build vector. Try to make a smaller build vector.
16500   if (V->getOpcode() == ISD::BUILD_VECTOR) {
16501     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16502       EVT InVT = V->getValueType(0);
16503       unsigned ExtractSize = NVT.getSizeInBits();
16504       unsigned EltSize = InVT.getScalarSizeInBits();
16505       // Only do this if we won't split any elements.
16506       if (ExtractSize % EltSize == 0) {
16507         unsigned NumElems = ExtractSize / EltSize;
16508         EVT EltVT = InVT.getVectorElementType();
16509         EVT ExtractVT = NumElems == 1 ? EltVT :
16510           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16511         if ((Level < AfterLegalizeDAG ||
16512              (NumElems == 1 ||
16513               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16514             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16515           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16516                             EltSize;
16517           if (NumElems == 1) {
16518             SDValue Src = V->getOperand(IdxVal);
16519             if (EltVT != Src.getValueType())
16520               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16521 
16522             return DAG.getBitcast(NVT, Src);
16523           }
16524 
16525           // Extract the pieces from the original build_vector.
16526           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16527                                             makeArrayRef(V->op_begin() + IdxVal,
16528                                                          NumElems));
16529           return DAG.getBitcast(NVT, BuildVec);
16530         }
16531       }
16532     }
16533   }
16534 
16535   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16536     // Handle only simple case where vector being inserted and vector
16537     // being extracted are of same size.
16538     EVT SmallVT = V->getOperand(1).getValueType();
16539     if (!NVT.bitsEq(SmallVT))
16540       return SDValue();
16541 
16542     // Only handle cases where both indexes are constants.
16543     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16544     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16545 
16546     if (InsIdx && ExtIdx) {
16547       // Combine:
16548       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16549       // Into:
16550       //    indices are equal or bit offsets are equal => V1
16551       //    otherwise => (extract_subvec V1, ExtIdx)
16552       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16553           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16554         return DAG.getBitcast(NVT, V->getOperand(1));
16555       return DAG.getNode(
16556           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16557           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16558           N->getOperand(1));
16559     }
16560   }
16561 
16562   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16563     return NarrowBOp;
16564 
16565   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16566     return SDValue(N, 0);
16567 
16568   return SDValue();
16569 }
16570 
16571 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16572 // or turn a shuffle of a single concat into simpler shuffle then concat.
16573 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16574   EVT VT = N->getValueType(0);
16575   unsigned NumElts = VT.getVectorNumElements();
16576 
16577   SDValue N0 = N->getOperand(0);
16578   SDValue N1 = N->getOperand(1);
16579   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16580 
16581   SmallVector<SDValue, 4> Ops;
16582   EVT ConcatVT = N0.getOperand(0).getValueType();
16583   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16584   unsigned NumConcats = NumElts / NumElemsPerConcat;
16585 
16586   // Special case: shuffle(concat(A,B)) can be more efficiently represented
16587   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16588   // half vector elements.
16589   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16590       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16591                   SVN->getMask().end(), [](int i) { return i == -1; })) {
16592     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16593                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16594     N1 = DAG.getUNDEF(ConcatVT);
16595     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16596   }
16597 
16598   // Look at every vector that's inserted. We're looking for exact
16599   // subvector-sized copies from a concatenated vector
16600   for (unsigned I = 0; I != NumConcats; ++I) {
16601     // Make sure we're dealing with a copy.
16602     unsigned Begin = I * NumElemsPerConcat;
16603     bool AllUndef = true, NoUndef = true;
16604     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16605       if (SVN->getMaskElt(J) >= 0)
16606         AllUndef = false;
16607       else
16608         NoUndef = false;
16609     }
16610 
16611     if (NoUndef) {
16612       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16613         return SDValue();
16614 
16615       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16616         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16617           return SDValue();
16618 
16619       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16620       if (FirstElt < N0.getNumOperands())
16621         Ops.push_back(N0.getOperand(FirstElt));
16622       else
16623         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16624 
16625     } else if (AllUndef) {
16626       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16627     } else { // Mixed with general masks and undefs, can't do optimization.
16628       return SDValue();
16629     }
16630   }
16631 
16632   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16633 }
16634 
16635 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16636 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16637 //
16638 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16639 // a simplification in some sense, but it isn't appropriate in general: some
16640 // BUILD_VECTORs are substantially cheaper than others. The general case
16641 // of a BUILD_VECTOR requires inserting each element individually (or
16642 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16643 // all constants is a single constant pool load.  A BUILD_VECTOR where each
16644 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
16645 // are undef lowers to a small number of element insertions.
16646 //
16647 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16648 // We don't fold shuffles where one side is a non-zero constant, and we don't
16649 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16650 // non-constant operands. This seems to work out reasonably well in practice.
16651 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16652                                        SelectionDAG &DAG,
16653                                        const TargetLowering &TLI) {
16654   EVT VT = SVN->getValueType(0);
16655   unsigned NumElts = VT.getVectorNumElements();
16656   SDValue N0 = SVN->getOperand(0);
16657   SDValue N1 = SVN->getOperand(1);
16658 
16659   if (!N0->hasOneUse() || !N1->hasOneUse())
16660     return SDValue();
16661 
16662   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16663   // discussed above.
16664   if (!N1.isUndef()) {
16665     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16666     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16667     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16668       return SDValue();
16669     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16670       return SDValue();
16671   }
16672 
16673   // If both inputs are splats of the same value then we can safely merge this
16674   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16675   bool IsSplat = false;
16676   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16677   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16678   if (BV0 && BV1)
16679     if (SDValue Splat0 = BV0->getSplatValue())
16680       IsSplat = (Splat0 == BV1->getSplatValue());
16681 
16682   SmallVector<SDValue, 8> Ops;
16683   SmallSet<SDValue, 16> DuplicateOps;
16684   for (int M : SVN->getMask()) {
16685     SDValue Op = DAG.getUNDEF(VT.getScalarType());
16686     if (M >= 0) {
16687       int Idx = M < (int)NumElts ? M : M - NumElts;
16688       SDValue &S = (M < (int)NumElts ? N0 : N1);
16689       if (S.getOpcode() == ISD::BUILD_VECTOR) {
16690         Op = S.getOperand(Idx);
16691       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16692         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16693         Op = S.getOperand(0);
16694       } else {
16695         // Operand can't be combined - bail out.
16696         return SDValue();
16697       }
16698     }
16699 
16700     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16701     // generating a splat; semantically, this is fine, but it's likely to
16702     // generate low-quality code if the target can't reconstruct an appropriate
16703     // shuffle.
16704     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16705       if (!IsSplat && !DuplicateOps.insert(Op).second)
16706         return SDValue();
16707 
16708     Ops.push_back(Op);
16709   }
16710 
16711   // BUILD_VECTOR requires all inputs to be of the same type, find the
16712   // maximum type and extend them all.
16713   EVT SVT = VT.getScalarType();
16714   if (SVT.isInteger())
16715     for (SDValue &Op : Ops)
16716       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
16717   if (SVT != VT.getScalarType())
16718     for (SDValue &Op : Ops)
16719       Op = TLI.isZExtFree(Op.getValueType(), SVT)
16720                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
16721                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
16722   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
16723 }
16724 
16725 // Match shuffles that can be converted to any_vector_extend_in_reg.
16726 // This is often generated during legalization.
16727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
16728 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
16729 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
16730                                             SelectionDAG &DAG,
16731                                             const TargetLowering &TLI,
16732                                             bool LegalOperations,
16733                                             bool LegalTypes) {
16734   EVT VT = SVN->getValueType(0);
16735   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16736 
16737   // TODO Add support for big-endian when we have a test case.
16738   if (!VT.isInteger() || IsBigEndian)
16739     return SDValue();
16740 
16741   unsigned NumElts = VT.getVectorNumElements();
16742   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16743   ArrayRef<int> Mask = SVN->getMask();
16744   SDValue N0 = SVN->getOperand(0);
16745 
16746   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
16747   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
16748     for (unsigned i = 0; i != NumElts; ++i) {
16749       if (Mask[i] < 0)
16750         continue;
16751       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
16752         continue;
16753       return false;
16754     }
16755     return true;
16756   };
16757 
16758   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
16759   // power-of-2 extensions as they are the most likely.
16760   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
16761     // Check for non power of 2 vector sizes
16762     if (NumElts % Scale != 0)
16763       continue;
16764     if (!isAnyExtend(Scale))
16765       continue;
16766 
16767     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
16768     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
16769     if (!LegalTypes || TLI.isTypeLegal(OutVT))
16770       if (!LegalOperations ||
16771           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
16772         return DAG.getBitcast(VT,
16773                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
16774   }
16775 
16776   return SDValue();
16777 }
16778 
16779 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
16780 // each source element of a large type into the lowest elements of a smaller
16781 // destination type. This is often generated during legalization.
16782 // If the source node itself was a '*_extend_vector_inreg' node then we should
16783 // then be able to remove it.
16784 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
16785                                         SelectionDAG &DAG) {
16786   EVT VT = SVN->getValueType(0);
16787   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16788 
16789   // TODO Add support for big-endian when we have a test case.
16790   if (!VT.isInteger() || IsBigEndian)
16791     return SDValue();
16792 
16793   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
16794 
16795   unsigned Opcode = N0.getOpcode();
16796   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
16797       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
16798       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
16799     return SDValue();
16800 
16801   SDValue N00 = N0.getOperand(0);
16802   ArrayRef<int> Mask = SVN->getMask();
16803   unsigned NumElts = VT.getVectorNumElements();
16804   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16805   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
16806   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
16807 
16808   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
16809     return SDValue();
16810   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
16811 
16812   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
16813   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
16814   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
16815   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
16816     for (unsigned i = 0; i != NumElts; ++i) {
16817       if (Mask[i] < 0)
16818         continue;
16819       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
16820         continue;
16821       return false;
16822     }
16823     return true;
16824   };
16825 
16826   // At the moment we just handle the case where we've truncated back to the
16827   // same size as before the extension.
16828   // TODO: handle more extension/truncation cases as cases arise.
16829   if (EltSizeInBits != ExtSrcSizeInBits)
16830     return SDValue();
16831 
16832   // We can remove *extend_vector_inreg only if the truncation happens at
16833   // the same scale as the extension.
16834   if (isTruncate(ExtScale))
16835     return DAG.getBitcast(VT, N00);
16836 
16837   return SDValue();
16838 }
16839 
16840 // Combine shuffles of splat-shuffles of the form:
16841 // shuffle (shuffle V, undef, splat-mask), undef, M
16842 // If splat-mask contains undef elements, we need to be careful about
16843 // introducing undef's in the folded mask which are not the result of composing
16844 // the masks of the shuffles.
16845 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
16846                                      ShuffleVectorSDNode *Splat,
16847                                      SelectionDAG &DAG) {
16848   ArrayRef<int> SplatMask = Splat->getMask();
16849   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
16850 
16851   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
16852   // every undef mask element in the splat-shuffle has a corresponding undef
16853   // element in the user-shuffle's mask or if the composition of mask elements
16854   // would result in undef.
16855   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
16856   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
16857   //   In this case it is not legal to simplify to the splat-shuffle because we
16858   //   may be exposing the users of the shuffle an undef element at index 1
16859   //   which was not there before the combine.
16860   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
16861   //   In this case the composition of masks yields SplatMask, so it's ok to
16862   //   simplify to the splat-shuffle.
16863   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
16864   //   In this case the composed mask includes all undef elements of SplatMask
16865   //   and in addition sets element zero to undef. It is safe to simplify to
16866   //   the splat-shuffle.
16867   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
16868                                        ArrayRef<int> SplatMask) {
16869     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
16870       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
16871           SplatMask[UserMask[i]] != -1)
16872         return false;
16873     return true;
16874   };
16875   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
16876     return SDValue(Splat, 0);
16877 
16878   // Create a new shuffle with a mask that is composed of the two shuffles'
16879   // masks.
16880   SmallVector<int, 32> NewMask;
16881   for (int Idx : UserMask)
16882     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
16883 
16884   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
16885                               Splat->getOperand(0), Splat->getOperand(1),
16886                               NewMask);
16887 }
16888 
16889 /// If the shuffle mask is taking exactly one element from the first vector
16890 /// operand and passing through all other elements from the second vector
16891 /// operand, return the index of the mask element that is choosing an element
16892 /// from the first operand. Otherwise, return -1.
16893 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
16894   int MaskSize = Mask.size();
16895   int EltFromOp0 = -1;
16896   // TODO: This does not match if there are undef elements in the shuffle mask.
16897   // Should we ignore undefs in the shuffle mask instead? The trade-off is
16898   // removing an instruction (a shuffle), but losing the knowledge that some
16899   // vector lanes are not needed.
16900   for (int i = 0; i != MaskSize; ++i) {
16901     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
16902       // We're looking for a shuffle of exactly one element from operand 0.
16903       if (EltFromOp0 != -1)
16904         return -1;
16905       EltFromOp0 = i;
16906     } else if (Mask[i] != i + MaskSize) {
16907       // Nothing from operand 1 can change lanes.
16908       return -1;
16909     }
16910   }
16911   return EltFromOp0;
16912 }
16913 
16914 /// If a shuffle inserts exactly one element from a source vector operand into
16915 /// another vector operand and we can access the specified element as a scalar,
16916 /// then we can eliminate the shuffle.
16917 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
16918                                       SelectionDAG &DAG) {
16919   // First, check if we are taking one element of a vector and shuffling that
16920   // element into another vector.
16921   ArrayRef<int> Mask = Shuf->getMask();
16922   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
16923   SDValue Op0 = Shuf->getOperand(0);
16924   SDValue Op1 = Shuf->getOperand(1);
16925   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
16926   if (ShufOp0Index == -1) {
16927     // Commute mask and check again.
16928     ShuffleVectorSDNode::commuteMask(CommutedMask);
16929     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
16930     if (ShufOp0Index == -1)
16931       return SDValue();
16932     // Commute operands to match the commuted shuffle mask.
16933     std::swap(Op0, Op1);
16934     Mask = CommutedMask;
16935   }
16936 
16937   // The shuffle inserts exactly one element from operand 0 into operand 1.
16938   // Now see if we can access that element as a scalar via a real insert element
16939   // instruction.
16940   // TODO: We can try harder to locate the element as a scalar. Examples: it
16941   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
16942   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
16943          "Shuffle mask value must be from operand 0");
16944   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
16945     return SDValue();
16946 
16947   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
16948   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
16949     return SDValue();
16950 
16951   // There's an existing insertelement with constant insertion index, so we
16952   // don't need to check the legality/profitability of a replacement operation
16953   // that differs at most in the constant value. The target should be able to
16954   // lower any of those in a similar way. If not, legalization will expand this
16955   // to a scalar-to-vector plus shuffle.
16956   //
16957   // Note that the shuffle may move the scalar from the position that the insert
16958   // element used. Therefore, our new insert element occurs at the shuffle's
16959   // mask index value, not the insert's index value.
16960   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
16961   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
16962                                         Op0.getOperand(2).getValueType());
16963   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
16964                      Op1, Op0.getOperand(1), NewInsIndex);
16965 }
16966 
16967 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
16968   EVT VT = N->getValueType(0);
16969   unsigned NumElts = VT.getVectorNumElements();
16970 
16971   SDValue N0 = N->getOperand(0);
16972   SDValue N1 = N->getOperand(1);
16973 
16974   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
16975 
16976   // Canonicalize shuffle undef, undef -> undef
16977   if (N0.isUndef() && N1.isUndef())
16978     return DAG.getUNDEF(VT);
16979 
16980   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16981 
16982   // Canonicalize shuffle v, v -> v, undef
16983   if (N0 == N1) {
16984     SmallVector<int, 8> NewMask;
16985     for (unsigned i = 0; i != NumElts; ++i) {
16986       int Idx = SVN->getMaskElt(i);
16987       if (Idx >= (int)NumElts) Idx -= NumElts;
16988       NewMask.push_back(Idx);
16989     }
16990     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
16991   }
16992 
16993   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
16994   if (N0.isUndef())
16995     return DAG.getCommutedVectorShuffle(*SVN);
16996 
16997   // Remove references to rhs if it is undef
16998   if (N1.isUndef()) {
16999     bool Changed = false;
17000     SmallVector<int, 8> NewMask;
17001     for (unsigned i = 0; i != NumElts; ++i) {
17002       int Idx = SVN->getMaskElt(i);
17003       if (Idx >= (int)NumElts) {
17004         Idx = -1;
17005         Changed = true;
17006       }
17007       NewMask.push_back(Idx);
17008     }
17009     if (Changed)
17010       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17011   }
17012 
17013   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17014     return InsElt;
17015 
17016   // A shuffle of a single vector that is a splat can always be folded.
17017   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17018     if (N1->isUndef() && N0Shuf->isSplat())
17019       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17020 
17021   // If it is a splat, check if the argument vector is another splat or a
17022   // build_vector.
17023   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17024     SDNode *V = N0.getNode();
17025 
17026     // If this is a bit convert that changes the element type of the vector but
17027     // not the number of vector elements, look through it.  Be careful not to
17028     // look though conversions that change things like v4f32 to v2f64.
17029     if (V->getOpcode() == ISD::BITCAST) {
17030       SDValue ConvInput = V->getOperand(0);
17031       if (ConvInput.getValueType().isVector() &&
17032           ConvInput.getValueType().getVectorNumElements() == NumElts)
17033         V = ConvInput.getNode();
17034     }
17035 
17036     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17037       assert(V->getNumOperands() == NumElts &&
17038              "BUILD_VECTOR has wrong number of operands");
17039       SDValue Base;
17040       bool AllSame = true;
17041       for (unsigned i = 0; i != NumElts; ++i) {
17042         if (!V->getOperand(i).isUndef()) {
17043           Base = V->getOperand(i);
17044           break;
17045         }
17046       }
17047       // Splat of <u, u, u, u>, return <u, u, u, u>
17048       if (!Base.getNode())
17049         return N0;
17050       for (unsigned i = 0; i != NumElts; ++i) {
17051         if (V->getOperand(i) != Base) {
17052           AllSame = false;
17053           break;
17054         }
17055       }
17056       // Splat of <x, x, x, x>, return <x, x, x, x>
17057       if (AllSame)
17058         return N0;
17059 
17060       // Canonicalize any other splat as a build_vector.
17061       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17062       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17063       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17064 
17065       // We may have jumped through bitcasts, so the type of the
17066       // BUILD_VECTOR may not match the type of the shuffle.
17067       if (V->getValueType(0) != VT)
17068         NewBV = DAG.getBitcast(VT, NewBV);
17069       return NewBV;
17070     }
17071   }
17072 
17073   // Simplify source operands based on shuffle mask.
17074   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17075     return SDValue(N, 0);
17076 
17077   // Match shuffles that can be converted to any_vector_extend_in_reg.
17078   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
17079     return V;
17080 
17081   // Combine "truncate_vector_in_reg" style shuffles.
17082   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17083     return V;
17084 
17085   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17086       Level < AfterLegalizeVectorOps &&
17087       (N1.isUndef() ||
17088       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17089        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17090     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17091       return V;
17092   }
17093 
17094   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17095   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17096   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17097     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17098       return Res;
17099 
17100   // If this shuffle only has a single input that is a bitcasted shuffle,
17101   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17102   // back to their original types.
17103   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17104       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17105       TLI.isTypeLegal(VT)) {
17106 
17107     // Peek through the bitcast only if there is one user.
17108     SDValue BC0 = N0;
17109     while (BC0.getOpcode() == ISD::BITCAST) {
17110       if (!BC0.hasOneUse())
17111         break;
17112       BC0 = BC0.getOperand(0);
17113     }
17114 
17115     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17116       if (Scale == 1)
17117         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17118 
17119       SmallVector<int, 8> NewMask;
17120       for (int M : Mask)
17121         for (int s = 0; s != Scale; ++s)
17122           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17123       return NewMask;
17124     };
17125 
17126     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17127       EVT SVT = VT.getScalarType();
17128       EVT InnerVT = BC0->getValueType(0);
17129       EVT InnerSVT = InnerVT.getScalarType();
17130 
17131       // Determine which shuffle works with the smaller scalar type.
17132       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17133       EVT ScaleSVT = ScaleVT.getScalarType();
17134 
17135       if (TLI.isTypeLegal(ScaleVT) &&
17136           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17137           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17138         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17139         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17140 
17141         // Scale the shuffle masks to the smaller scalar type.
17142         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17143         SmallVector<int, 8> InnerMask =
17144             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17145         SmallVector<int, 8> OuterMask =
17146             ScaleShuffleMask(SVN->getMask(), OuterScale);
17147 
17148         // Merge the shuffle masks.
17149         SmallVector<int, 8> NewMask;
17150         for (int M : OuterMask)
17151           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17152 
17153         // Test for shuffle mask legality over both commutations.
17154         SDValue SV0 = BC0->getOperand(0);
17155         SDValue SV1 = BC0->getOperand(1);
17156         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17157         if (!LegalMask) {
17158           std::swap(SV0, SV1);
17159           ShuffleVectorSDNode::commuteMask(NewMask);
17160           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17161         }
17162 
17163         if (LegalMask) {
17164           SV0 = DAG.getBitcast(ScaleVT, SV0);
17165           SV1 = DAG.getBitcast(ScaleVT, SV1);
17166           return DAG.getBitcast(
17167               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17168         }
17169       }
17170     }
17171   }
17172 
17173   // Canonicalize shuffles according to rules:
17174   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17175   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17176   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17177   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17178       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17179       TLI.isTypeLegal(VT)) {
17180     // The incoming shuffle must be of the same type as the result of the
17181     // current shuffle.
17182     assert(N1->getOperand(0).getValueType() == VT &&
17183            "Shuffle types don't match");
17184 
17185     SDValue SV0 = N1->getOperand(0);
17186     SDValue SV1 = N1->getOperand(1);
17187     bool HasSameOp0 = N0 == SV0;
17188     bool IsSV1Undef = SV1.isUndef();
17189     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17190       // Commute the operands of this shuffle so that next rule
17191       // will trigger.
17192       return DAG.getCommutedVectorShuffle(*SVN);
17193   }
17194 
17195   // Try to fold according to rules:
17196   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17197   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17198   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17199   // Don't try to fold shuffles with illegal type.
17200   // Only fold if this shuffle is the only user of the other shuffle.
17201   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17202       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17203     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17204 
17205     // Don't try to fold splats; they're likely to simplify somehow, or they
17206     // might be free.
17207     if (OtherSV->isSplat())
17208       return SDValue();
17209 
17210     // The incoming shuffle must be of the same type as the result of the
17211     // current shuffle.
17212     assert(OtherSV->getOperand(0).getValueType() == VT &&
17213            "Shuffle types don't match");
17214 
17215     SDValue SV0, SV1;
17216     SmallVector<int, 4> Mask;
17217     // Compute the combined shuffle mask for a shuffle with SV0 as the first
17218     // operand, and SV1 as the second operand.
17219     for (unsigned i = 0; i != NumElts; ++i) {
17220       int Idx = SVN->getMaskElt(i);
17221       if (Idx < 0) {
17222         // Propagate Undef.
17223         Mask.push_back(Idx);
17224         continue;
17225       }
17226 
17227       SDValue CurrentVec;
17228       if (Idx < (int)NumElts) {
17229         // This shuffle index refers to the inner shuffle N0. Lookup the inner
17230         // shuffle mask to identify which vector is actually referenced.
17231         Idx = OtherSV->getMaskElt(Idx);
17232         if (Idx < 0) {
17233           // Propagate Undef.
17234           Mask.push_back(Idx);
17235           continue;
17236         }
17237 
17238         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17239                                            : OtherSV->getOperand(1);
17240       } else {
17241         // This shuffle index references an element within N1.
17242         CurrentVec = N1;
17243       }
17244 
17245       // Simple case where 'CurrentVec' is UNDEF.
17246       if (CurrentVec.isUndef()) {
17247         Mask.push_back(-1);
17248         continue;
17249       }
17250 
17251       // Canonicalize the shuffle index. We don't know yet if CurrentVec
17252       // will be the first or second operand of the combined shuffle.
17253       Idx = Idx % NumElts;
17254       if (!SV0.getNode() || SV0 == CurrentVec) {
17255         // Ok. CurrentVec is the left hand side.
17256         // Update the mask accordingly.
17257         SV0 = CurrentVec;
17258         Mask.push_back(Idx);
17259         continue;
17260       }
17261 
17262       // Bail out if we cannot convert the shuffle pair into a single shuffle.
17263       if (SV1.getNode() && SV1 != CurrentVec)
17264         return SDValue();
17265 
17266       // Ok. CurrentVec is the right hand side.
17267       // Update the mask accordingly.
17268       SV1 = CurrentVec;
17269       Mask.push_back(Idx + NumElts);
17270     }
17271 
17272     // Check if all indices in Mask are Undef. In case, propagate Undef.
17273     bool isUndefMask = true;
17274     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17275       isUndefMask &= Mask[i] < 0;
17276 
17277     if (isUndefMask)
17278       return DAG.getUNDEF(VT);
17279 
17280     if (!SV0.getNode())
17281       SV0 = DAG.getUNDEF(VT);
17282     if (!SV1.getNode())
17283       SV1 = DAG.getUNDEF(VT);
17284 
17285     // Avoid introducing shuffles with illegal mask.
17286     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17287       ShuffleVectorSDNode::commuteMask(Mask);
17288 
17289       if (!TLI.isShuffleMaskLegal(Mask, VT))
17290         return SDValue();
17291 
17292       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17293       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17294       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17295       std::swap(SV0, SV1);
17296     }
17297 
17298     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17299     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17300     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17301     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17302   }
17303 
17304   return SDValue();
17305 }
17306 
17307 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17308   SDValue InVal = N->getOperand(0);
17309   EVT VT = N->getValueType(0);
17310 
17311   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17312   // with a VECTOR_SHUFFLE and possible truncate.
17313   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17314     SDValue InVec = InVal->getOperand(0);
17315     SDValue EltNo = InVal->getOperand(1);
17316     auto InVecT = InVec.getValueType();
17317     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17318       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17319       int Elt = C0->getZExtValue();
17320       NewMask[0] = Elt;
17321       SDValue Val;
17322       // If we have an implict truncate do truncate here as long as it's legal.
17323       // if it's not legal, this should
17324       if (VT.getScalarType() != InVal.getValueType() &&
17325           InVal.getValueType().isScalarInteger() &&
17326           isTypeLegal(VT.getScalarType())) {
17327         Val =
17328             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17329         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17330       }
17331       if (VT.getScalarType() == InVecT.getScalarType() &&
17332           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17333           TLI.isShuffleMaskLegal(NewMask, VT)) {
17334         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17335                                    DAG.getUNDEF(InVecT), NewMask);
17336         // If the initial vector is the correct size this shuffle is a
17337         // valid result.
17338         if (VT == InVecT)
17339           return Val;
17340         // If not we must truncate the vector.
17341         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17342           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17343           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17344           EVT SubVT =
17345               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17346                                VT.getVectorNumElements());
17347           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17348                             ZeroIdx);
17349           return Val;
17350         }
17351       }
17352     }
17353   }
17354 
17355   return SDValue();
17356 }
17357 
17358 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17359   EVT VT = N->getValueType(0);
17360   SDValue N0 = N->getOperand(0);
17361   SDValue N1 = N->getOperand(1);
17362   SDValue N2 = N->getOperand(2);
17363 
17364   // If inserting an UNDEF, just return the original vector.
17365   if (N1.isUndef())
17366     return N0;
17367 
17368   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
17369   // us to pull BITCASTs from input to output.
17370   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
17371     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
17372       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
17373 
17374   // If this is an insert of an extracted vector into an undef vector, we can
17375   // just use the input to the extract.
17376   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17377       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17378     return N1.getOperand(0);
17379 
17380   // If we are inserting a bitcast value into an undef, with the same
17381   // number of elements, just use the bitcast input of the extract.
17382   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17383   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17384   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17385       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17386       N1.getOperand(0).getOperand(1) == N2 &&
17387       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17388           VT.getVectorNumElements() &&
17389       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17390           VT.getSizeInBits()) {
17391     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17392   }
17393 
17394   // If both N1 and N2 are bitcast values on which insert_subvector
17395   // would makes sense, pull the bitcast through.
17396   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17397   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17398   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17399     SDValue CN0 = N0.getOperand(0);
17400     SDValue CN1 = N1.getOperand(0);
17401     EVT CN0VT = CN0.getValueType();
17402     EVT CN1VT = CN1.getValueType();
17403     if (CN0VT.isVector() && CN1VT.isVector() &&
17404         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17405         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17406       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17407                                       CN0.getValueType(), CN0, CN1, N2);
17408       return DAG.getBitcast(VT, NewINSERT);
17409     }
17410   }
17411 
17412   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17413   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17414   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17415   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17416       N0.getOperand(1).getValueType() == N1.getValueType() &&
17417       N0.getOperand(2) == N2)
17418     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
17419                        N1, N2);
17420 
17421   if (!isa<ConstantSDNode>(N2))
17422     return SDValue();
17423 
17424   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
17425 
17426   // Canonicalize insert_subvector dag nodes.
17427   // Example:
17428   // (insert_subvector (insert_subvector A, Idx0), Idx1)
17429   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
17430   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
17431       N1.getValueType() == N0.getOperand(1).getValueType() &&
17432       isa<ConstantSDNode>(N0.getOperand(2))) {
17433     unsigned OtherIdx = N0.getConstantOperandVal(2);
17434     if (InsIdx < OtherIdx) {
17435       // Swap nodes.
17436       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
17437                                   N0.getOperand(0), N1, N2);
17438       AddToWorklist(NewOp.getNode());
17439       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17440                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17441     }
17442   }
17443 
17444   // If the input vector is a concatenation, and the insert replaces
17445   // one of the pieces, we can optimize into a single concat_vectors.
17446   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17447       N0.getOperand(0).getValueType() == N1.getValueType()) {
17448     unsigned Factor = N1.getValueType().getVectorNumElements();
17449 
17450     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17451     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17452 
17453     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17454   }
17455 
17456   return SDValue();
17457 }
17458 
17459 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17460   SDValue N0 = N->getOperand(0);
17461 
17462   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17463   if (N0->getOpcode() == ISD::FP16_TO_FP)
17464     return N0->getOperand(0);
17465 
17466   return SDValue();
17467 }
17468 
17469 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17470   SDValue N0 = N->getOperand(0);
17471 
17472   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17473   if (N0->getOpcode() == ISD::AND) {
17474     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17475     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17476       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17477                          N0.getOperand(0));
17478     }
17479   }
17480 
17481   return SDValue();
17482 }
17483 
17484 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17485 /// with the destination vector and a zero vector.
17486 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17487 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
17488 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17489   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17490 
17491   EVT VT = N->getValueType(0);
17492   SDValue LHS = N->getOperand(0);
17493   SDValue RHS = peekThroughBitcast(N->getOperand(1));
17494   SDLoc DL(N);
17495 
17496   // Make sure we're not running after operation legalization where it
17497   // may have custom lowered the vector shuffles.
17498   if (LegalOperations)
17499     return SDValue();
17500 
17501   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17502     return SDValue();
17503 
17504   EVT RVT = RHS.getValueType();
17505   unsigned NumElts = RHS.getNumOperands();
17506 
17507   // Attempt to create a valid clear mask, splitting the mask into
17508   // sub elements and checking to see if each is
17509   // all zeros or all ones - suitable for shuffle masking.
17510   auto BuildClearMask = [&](int Split) {
17511     int NumSubElts = NumElts * Split;
17512     int NumSubBits = RVT.getScalarSizeInBits() / Split;
17513 
17514     SmallVector<int, 8> Indices;
17515     for (int i = 0; i != NumSubElts; ++i) {
17516       int EltIdx = i / Split;
17517       int SubIdx = i % Split;
17518       SDValue Elt = RHS.getOperand(EltIdx);
17519       if (Elt.isUndef()) {
17520         Indices.push_back(-1);
17521         continue;
17522       }
17523 
17524       APInt Bits;
17525       if (isa<ConstantSDNode>(Elt))
17526         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17527       else if (isa<ConstantFPSDNode>(Elt))
17528         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17529       else
17530         return SDValue();
17531 
17532       // Extract the sub element from the constant bit mask.
17533       if (DAG.getDataLayout().isBigEndian()) {
17534         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17535       } else {
17536         Bits.lshrInPlace(SubIdx * NumSubBits);
17537       }
17538 
17539       if (Split > 1)
17540         Bits = Bits.trunc(NumSubBits);
17541 
17542       if (Bits.isAllOnesValue())
17543         Indices.push_back(i);
17544       else if (Bits == 0)
17545         Indices.push_back(i + NumSubElts);
17546       else
17547         return SDValue();
17548     }
17549 
17550     // Let's see if the target supports this vector_shuffle.
17551     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17552     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17553     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17554       return SDValue();
17555 
17556     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17557     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17558                                                    DAG.getBitcast(ClearVT, LHS),
17559                                                    Zero, Indices));
17560   };
17561 
17562   // Determine maximum split level (byte level masking).
17563   int MaxSplit = 1;
17564   if (RVT.getScalarSizeInBits() % 8 == 0)
17565     MaxSplit = RVT.getScalarSizeInBits() / 8;
17566 
17567   for (int Split = 1; Split <= MaxSplit; ++Split)
17568     if (RVT.getScalarSizeInBits() % Split == 0)
17569       if (SDValue S = BuildClearMask(Split))
17570         return S;
17571 
17572   return SDValue();
17573 }
17574 
17575 /// Visit a binary vector operation, like ADD.
17576 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17577   assert(N->getValueType(0).isVector() &&
17578          "SimplifyVBinOp only works on vectors!");
17579 
17580   SDValue LHS = N->getOperand(0);
17581   SDValue RHS = N->getOperand(1);
17582   SDValue Ops[] = {LHS, RHS};
17583 
17584   // See if we can constant fold the vector operation.
17585   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17586           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17587     return Fold;
17588 
17589   // Type legalization might introduce new shuffles in the DAG.
17590   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17591   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
17592   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17593       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17594       LHS.getOperand(1).isUndef() &&
17595       RHS.getOperand(1).isUndef()) {
17596     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17597     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17598 
17599     if (SVN0->getMask().equals(SVN1->getMask())) {
17600       EVT VT = N->getValueType(0);
17601       SDValue UndefVector = LHS.getOperand(1);
17602       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17603                                      LHS.getOperand(0), RHS.getOperand(0),
17604                                      N->getFlags());
17605       AddUsersToWorklist(N);
17606       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17607                                   SVN0->getMask());
17608     }
17609   }
17610 
17611   return SDValue();
17612 }
17613 
17614 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17615                                     SDValue N2) {
17616   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17617 
17618   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17619                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
17620 
17621   // If we got a simplified select_cc node back from SimplifySelectCC, then
17622   // break it down into a new SETCC node, and a new SELECT node, and then return
17623   // the SELECT node, since we were called with a SELECT node.
17624   if (SCC.getNode()) {
17625     // Check to see if we got a select_cc back (to turn into setcc/select).
17626     // Otherwise, just return whatever node we got back, like fabs.
17627     if (SCC.getOpcode() == ISD::SELECT_CC) {
17628       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17629                                   N0.getValueType(),
17630                                   SCC.getOperand(0), SCC.getOperand(1),
17631                                   SCC.getOperand(4));
17632       AddToWorklist(SETCC.getNode());
17633       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17634                            SCC.getOperand(2), SCC.getOperand(3));
17635     }
17636 
17637     return SCC;
17638   }
17639   return SDValue();
17640 }
17641 
17642 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17643 /// being selected between, see if we can simplify the select.  Callers of this
17644 /// should assume that TheSelect is deleted if this returns true.  As such, they
17645 /// should return the appropriate thing (e.g. the node) back to the top-level of
17646 /// the DAG combiner loop to avoid it being looked at.
17647 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17648                                     SDValue RHS) {
17649   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17650   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17651   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17652     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17653       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17654       SDValue Sqrt = RHS;
17655       ISD::CondCode CC;
17656       SDValue CmpLHS;
17657       const ConstantFPSDNode *Zero = nullptr;
17658 
17659       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17660         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17661         CmpLHS = TheSelect->getOperand(0);
17662         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17663       } else {
17664         // SELECT or VSELECT
17665         SDValue Cmp = TheSelect->getOperand(0);
17666         if (Cmp.getOpcode() == ISD::SETCC) {
17667           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17668           CmpLHS = Cmp.getOperand(0);
17669           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17670         }
17671       }
17672       if (Zero && Zero->isZero() &&
17673           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17674           CC == ISD::SETULT || CC == ISD::SETLT)) {
17675         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17676         CombineTo(TheSelect, Sqrt);
17677         return true;
17678       }
17679     }
17680   }
17681   // Cannot simplify select with vector condition
17682   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17683 
17684   // If this is a select from two identical things, try to pull the operation
17685   // through the select.
17686   if (LHS.getOpcode() != RHS.getOpcode() ||
17687       !LHS.hasOneUse() || !RHS.hasOneUse())
17688     return false;
17689 
17690   // If this is a load and the token chain is identical, replace the select
17691   // of two loads with a load through a select of the address to load from.
17692   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17693   // constants have been dropped into the constant pool.
17694   if (LHS.getOpcode() == ISD::LOAD) {
17695     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17696     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17697 
17698     // Token chains must be identical.
17699     if (LHS.getOperand(0) != RHS.getOperand(0) ||
17700         // Do not let this transformation reduce the number of volatile loads.
17701         LLD->isVolatile() || RLD->isVolatile() ||
17702         // FIXME: If either is a pre/post inc/dec load,
17703         // we'd need to split out the address adjustment.
17704         LLD->isIndexed() || RLD->isIndexed() ||
17705         // If this is an EXTLOAD, the VT's must match.
17706         LLD->getMemoryVT() != RLD->getMemoryVT() ||
17707         // If this is an EXTLOAD, the kind of extension must match.
17708         (LLD->getExtensionType() != RLD->getExtensionType() &&
17709          // The only exception is if one of the extensions is anyext.
17710          LLD->getExtensionType() != ISD::EXTLOAD &&
17711          RLD->getExtensionType() != ISD::EXTLOAD) ||
17712         // FIXME: this discards src value information.  This is
17713         // over-conservative. It would be beneficial to be able to remember
17714         // both potential memory locations.  Since we are discarding
17715         // src value info, don't do the transformation if the memory
17716         // locations are not in the default address space.
17717         LLD->getPointerInfo().getAddrSpace() != 0 ||
17718         RLD->getPointerInfo().getAddrSpace() != 0 ||
17719         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17720                                       LLD->getBasePtr().getValueType()))
17721       return false;
17722 
17723     // Check that the select condition doesn't reach either load.  If so,
17724     // folding this will induce a cycle into the DAG.  If not, this is safe to
17725     // xform, so create a select of the addresses.
17726     SDValue Addr;
17727     if (TheSelect->getOpcode() == ISD::SELECT) {
17728       SDNode *CondNode = TheSelect->getOperand(0).getNode();
17729       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
17730           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
17731         return false;
17732       // The loads must not depend on one another.
17733       if (LLD->isPredecessorOf(RLD) ||
17734           RLD->isPredecessorOf(LLD))
17735         return false;
17736       Addr = DAG.getSelect(SDLoc(TheSelect),
17737                            LLD->getBasePtr().getValueType(),
17738                            TheSelect->getOperand(0), LLD->getBasePtr(),
17739                            RLD->getBasePtr());
17740     } else {  // Otherwise SELECT_CC
17741       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
17742       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
17743 
17744       if ((LLD->hasAnyUseOfValue(1) &&
17745            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
17746           (RLD->hasAnyUseOfValue(1) &&
17747            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
17748         return false;
17749 
17750       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
17751                          LLD->getBasePtr().getValueType(),
17752                          TheSelect->getOperand(0),
17753                          TheSelect->getOperand(1),
17754                          LLD->getBasePtr(), RLD->getBasePtr(),
17755                          TheSelect->getOperand(4));
17756     }
17757 
17758     SDValue Load;
17759     // It is safe to replace the two loads if they have different alignments,
17760     // but the new load must be the minimum (most restrictive) alignment of the
17761     // inputs.
17762     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
17763     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
17764     if (!RLD->isInvariant())
17765       MMOFlags &= ~MachineMemOperand::MOInvariant;
17766     if (!RLD->isDereferenceable())
17767       MMOFlags &= ~MachineMemOperand::MODereferenceable;
17768     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
17769       // FIXME: Discards pointer and AA info.
17770       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
17771                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
17772                          MMOFlags);
17773     } else {
17774       // FIXME: Discards pointer and AA info.
17775       Load = DAG.getExtLoad(
17776           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
17777                                                   : LLD->getExtensionType(),
17778           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
17779           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
17780     }
17781 
17782     // Users of the select now use the result of the load.
17783     CombineTo(TheSelect, Load);
17784 
17785     // Users of the old loads now use the new load's chain.  We know the
17786     // old-load value is dead now.
17787     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
17788     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
17789     return true;
17790   }
17791 
17792   return false;
17793 }
17794 
17795 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
17796 /// bitwise 'and'.
17797 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
17798                                             SDValue N1, SDValue N2, SDValue N3,
17799                                             ISD::CondCode CC) {
17800   // If this is a select where the false operand is zero and the compare is a
17801   // check of the sign bit, see if we can perform the "gzip trick":
17802   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
17803   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
17804   EVT XType = N0.getValueType();
17805   EVT AType = N2.getValueType();
17806   if (!isNullConstant(N3) || !XType.bitsGE(AType))
17807     return SDValue();
17808 
17809   // If the comparison is testing for a positive value, we have to invert
17810   // the sign bit mask, so only do that transform if the target has a bitwise
17811   // 'and not' instruction (the invert is free).
17812   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
17813     // (X > -1) ? A : 0
17814     // (X >  0) ? X : 0 <-- This is canonical signed max.
17815     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
17816       return SDValue();
17817   } else if (CC == ISD::SETLT) {
17818     // (X <  0) ? A : 0
17819     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
17820     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
17821       return SDValue();
17822   } else {
17823     return SDValue();
17824   }
17825 
17826   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
17827   // constant.
17828   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
17829   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17830   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
17831     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
17832     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
17833     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
17834     AddToWorklist(Shift.getNode());
17835 
17836     if (XType.bitsGT(AType)) {
17837       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17838       AddToWorklist(Shift.getNode());
17839     }
17840 
17841     if (CC == ISD::SETGT)
17842       Shift = DAG.getNOT(DL, Shift, AType);
17843 
17844     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17845   }
17846 
17847   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
17848   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
17849   AddToWorklist(Shift.getNode());
17850 
17851   if (XType.bitsGT(AType)) {
17852     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17853     AddToWorklist(Shift.getNode());
17854   }
17855 
17856   if (CC == ISD::SETGT)
17857     Shift = DAG.getNOT(DL, Shift, AType);
17858 
17859   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17860 }
17861 
17862 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
17863 /// where 'cond' is the comparison specified by CC.
17864 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
17865                                       SDValue N2, SDValue N3, ISD::CondCode CC,
17866                                       bool NotExtCompare) {
17867   // (x ? y : y) -> y.
17868   if (N2 == N3) return N2;
17869 
17870   EVT VT = N2.getValueType();
17871   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
17872   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17873 
17874   // Determine if the condition we're dealing with is constant
17875   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
17876                               N0, N1, CC, DL, false);
17877   if (SCC.getNode()) AddToWorklist(SCC.getNode());
17878 
17879   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
17880     // fold select_cc true, x, y -> x
17881     // fold select_cc false, x, y -> y
17882     return !SCCC->isNullValue() ? N2 : N3;
17883   }
17884 
17885   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
17886   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
17887   // in it.  This is a win when the constant is not otherwise available because
17888   // it replaces two constant pool loads with one.  We only do this if the FP
17889   // type is known to be legal, because if it isn't, then we are before legalize
17890   // types an we want the other legalization to happen first (e.g. to avoid
17891   // messing with soft float) and if the ConstantFP is not legal, because if
17892   // it is legal, we may not need to store the FP constant in a constant pool.
17893   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
17894     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
17895       if (TLI.isTypeLegal(N2.getValueType()) &&
17896           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
17897                TargetLowering::Legal &&
17898            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
17899            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
17900           // If both constants have multiple uses, then we won't need to do an
17901           // extra load, they are likely around in registers for other users.
17902           (TV->hasOneUse() || FV->hasOneUse())) {
17903         Constant *Elts[] = {
17904           const_cast<ConstantFP*>(FV->getConstantFPValue()),
17905           const_cast<ConstantFP*>(TV->getConstantFPValue())
17906         };
17907         Type *FPTy = Elts[0]->getType();
17908         const DataLayout &TD = DAG.getDataLayout();
17909 
17910         // Create a ConstantArray of the two constants.
17911         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
17912         SDValue CPIdx =
17913             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
17914                                 TD.getPrefTypeAlignment(FPTy));
17915         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
17916 
17917         // Get the offsets to the 0 and 1 element of the array so that we can
17918         // select between them.
17919         SDValue Zero = DAG.getIntPtrConstant(0, DL);
17920         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
17921         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
17922 
17923         SDValue Cond = DAG.getSetCC(DL,
17924                                     getSetCCResultType(N0.getValueType()),
17925                                     N0, N1, CC);
17926         AddToWorklist(Cond.getNode());
17927         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
17928                                           Cond, One, Zero);
17929         AddToWorklist(CstOffset.getNode());
17930         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
17931                             CstOffset);
17932         AddToWorklist(CPIdx.getNode());
17933         return DAG.getLoad(
17934             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
17935             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
17936             Alignment);
17937       }
17938     }
17939 
17940   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
17941     return V;
17942 
17943   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
17944   // where y is has a single bit set.
17945   // A plaintext description would be, we can turn the SELECT_CC into an AND
17946   // when the condition can be materialized as an all-ones register.  Any
17947   // single bit-test can be materialized as an all-ones register with
17948   // shift-left and shift-right-arith.
17949   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
17950       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
17951     SDValue AndLHS = N0->getOperand(0);
17952     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17953     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
17954       // Shift the tested bit over the sign bit.
17955       const APInt &AndMask = ConstAndRHS->getAPIntValue();
17956       SDValue ShlAmt =
17957         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
17958                         getShiftAmountTy(AndLHS.getValueType()));
17959       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
17960 
17961       // Now arithmetic right shift it all the way over, so the result is either
17962       // all-ones, or zero.
17963       SDValue ShrAmt =
17964         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
17965                         getShiftAmountTy(Shl.getValueType()));
17966       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
17967 
17968       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
17969     }
17970   }
17971 
17972   // fold select C, 16, 0 -> shl C, 4
17973   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
17974       TLI.getBooleanContents(N0.getValueType()) ==
17975           TargetLowering::ZeroOrOneBooleanContent) {
17976 
17977     // If the caller doesn't want us to simplify this into a zext of a compare,
17978     // don't do it.
17979     if (NotExtCompare && N2C->isOne())
17980       return SDValue();
17981 
17982     // Get a SetCC of the condition
17983     // NOTE: Don't create a SETCC if it's not legal on this target.
17984     if (!LegalOperations ||
17985         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
17986       SDValue Temp, SCC;
17987       // cast from setcc result type to select result type
17988       if (LegalTypes) {
17989         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
17990                             N0, N1, CC);
17991         if (N2.getValueType().bitsLT(SCC.getValueType()))
17992           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
17993                                         N2.getValueType());
17994         else
17995           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17996                              N2.getValueType(), SCC);
17997       } else {
17998         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
17999         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
18000                            N2.getValueType(), SCC);
18001       }
18002 
18003       AddToWorklist(SCC.getNode());
18004       AddToWorklist(Temp.getNode());
18005 
18006       if (N2C->isOne())
18007         return Temp;
18008 
18009       // shl setcc result by log2 n2c
18010       return DAG.getNode(
18011           ISD::SHL, DL, N2.getValueType(), Temp,
18012           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
18013                           getShiftAmountTy(Temp.getValueType())));
18014     }
18015   }
18016 
18017   // Check to see if this is an integer abs.
18018   // select_cc setg[te] X,  0,  X, -X ->
18019   // select_cc setgt    X, -1,  X, -X ->
18020   // select_cc setl[te] X,  0, -X,  X ->
18021   // select_cc setlt    X,  1, -X,  X ->
18022   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18023   if (N1C) {
18024     ConstantSDNode *SubC = nullptr;
18025     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18026          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18027         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18028       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18029     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18030               (N1C->isOne() && CC == ISD::SETLT)) &&
18031              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18032       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18033 
18034     EVT XType = N0.getValueType();
18035     if (SubC && SubC->isNullValue() && XType.isInteger()) {
18036       SDLoc DL(N0);
18037       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
18038                                   N0,
18039                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
18040                                          getShiftAmountTy(N0.getValueType())));
18041       SDValue Add = DAG.getNode(ISD::ADD, DL,
18042                                 XType, N0, Shift);
18043       AddToWorklist(Shift.getNode());
18044       AddToWorklist(Add.getNode());
18045       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
18046     }
18047   }
18048 
18049   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18050   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18051   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18052   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18053   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18054   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18055   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18056   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18057   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18058     SDValue ValueOnZero = N2;
18059     SDValue Count = N3;
18060     // If the condition is NE instead of E, swap the operands.
18061     if (CC == ISD::SETNE)
18062       std::swap(ValueOnZero, Count);
18063     // Check if the value on zero is a constant equal to the bits in the type.
18064     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18065       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18066         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18067         // legal, combine to just cttz.
18068         if ((Count.getOpcode() == ISD::CTTZ ||
18069              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18070             N0 == Count.getOperand(0) &&
18071             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18072           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18073         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18074         // legal, combine to just ctlz.
18075         if ((Count.getOpcode() == ISD::CTLZ ||
18076              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18077             N0 == Count.getOperand(0) &&
18078             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18079           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18080       }
18081     }
18082   }
18083 
18084   return SDValue();
18085 }
18086 
18087 /// This is a stub for TargetLowering::SimplifySetCC.
18088 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18089                                    ISD::CondCode Cond, const SDLoc &DL,
18090                                    bool foldBooleans) {
18091   TargetLowering::DAGCombinerInfo
18092     DagCombineInfo(DAG, Level, false, this);
18093   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18094 }
18095 
18096 /// Given an ISD::SDIV node expressing a divide by constant, return
18097 /// a DAG expression to select that will generate the same value by multiplying
18098 /// by a magic number.
18099 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18100 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18101   // when optimising for minimum size, we don't want to expand a div to a mul
18102   // and a shift.
18103   if (DAG.getMachineFunction().getFunction().optForMinSize())
18104     return SDValue();
18105 
18106   SmallVector<SDNode *, 8> Built;
18107   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18108     for (SDNode *N : Built)
18109       AddToWorklist(N);
18110     return S;
18111   }
18112 
18113   return SDValue();
18114 }
18115 
18116 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18117 /// DAG expression that will generate the same value by right shifting.
18118 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18119   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18120   if (!C)
18121     return SDValue();
18122 
18123   // Avoid division by zero.
18124   if (C->isNullValue())
18125     return SDValue();
18126 
18127   SmallVector<SDNode *, 8> Built;
18128   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18129     for (SDNode *N : Built)
18130       AddToWorklist(N);
18131     return S;
18132   }
18133 
18134   return SDValue();
18135 }
18136 
18137 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18138 /// expression that will generate the same value by multiplying by a magic
18139 /// number.
18140 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18141 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18142   // when optimising for minimum size, we don't want to expand a div to a mul
18143   // and a shift.
18144   if (DAG.getMachineFunction().getFunction().optForMinSize())
18145     return SDValue();
18146 
18147   SmallVector<SDNode *, 8> Built;
18148   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18149     for (SDNode *N : Built)
18150       AddToWorklist(N);
18151     return S;
18152   }
18153 
18154   return SDValue();
18155 }
18156 
18157 /// Determines the LogBase2 value for a non-null input value using the
18158 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18159 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18160   EVT VT = V.getValueType();
18161   unsigned EltBits = VT.getScalarSizeInBits();
18162   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18163   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18164   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18165   return LogBase2;
18166 }
18167 
18168 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18169 /// For the reciprocal, we need to find the zero of the function:
18170 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
18171 ///     =>
18172 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18173 ///     does not require additional intermediate precision]
18174 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18175   if (Level >= AfterLegalizeDAG)
18176     return SDValue();
18177 
18178   // TODO: Handle half and/or extended types?
18179   EVT VT = Op.getValueType();
18180   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18181     return SDValue();
18182 
18183   // If estimates are explicitly disabled for this function, we're done.
18184   MachineFunction &MF = DAG.getMachineFunction();
18185   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18186   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18187     return SDValue();
18188 
18189   // Estimates may be explicitly enabled for this type with a custom number of
18190   // refinement steps.
18191   int Iterations = TLI.getDivRefinementSteps(VT, MF);
18192   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18193     AddToWorklist(Est.getNode());
18194 
18195     if (Iterations) {
18196       EVT VT = Op.getValueType();
18197       SDLoc DL(Op);
18198       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18199 
18200       // Newton iterations: Est = Est + Est (1 - Arg * Est)
18201       for (int i = 0; i < Iterations; ++i) {
18202         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18203         AddToWorklist(NewEst.getNode());
18204 
18205         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18206         AddToWorklist(NewEst.getNode());
18207 
18208         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18209         AddToWorklist(NewEst.getNode());
18210 
18211         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18212         AddToWorklist(Est.getNode());
18213       }
18214     }
18215     return Est;
18216   }
18217 
18218   return SDValue();
18219 }
18220 
18221 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18222 /// For the reciprocal sqrt, we need to find the zero of the function:
18223 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18224 ///     =>
18225 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18226 /// As a result, we precompute A/2 prior to the iteration loop.
18227 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18228                                          unsigned Iterations,
18229                                          SDNodeFlags Flags, bool Reciprocal) {
18230   EVT VT = Arg.getValueType();
18231   SDLoc DL(Arg);
18232   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18233 
18234   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18235   // this entire sequence requires only one FP constant.
18236   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18237   AddToWorklist(HalfArg.getNode());
18238 
18239   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18240   AddToWorklist(HalfArg.getNode());
18241 
18242   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18243   for (unsigned i = 0; i < Iterations; ++i) {
18244     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18245     AddToWorklist(NewEst.getNode());
18246 
18247     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18248     AddToWorklist(NewEst.getNode());
18249 
18250     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18251     AddToWorklist(NewEst.getNode());
18252 
18253     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18254     AddToWorklist(Est.getNode());
18255   }
18256 
18257   // If non-reciprocal square root is requested, multiply the result by Arg.
18258   if (!Reciprocal) {
18259     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18260     AddToWorklist(Est.getNode());
18261   }
18262 
18263   return Est;
18264 }
18265 
18266 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18267 /// For the reciprocal sqrt, we need to find the zero of the function:
18268 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18269 ///     =>
18270 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18271 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18272                                          unsigned Iterations,
18273                                          SDNodeFlags Flags, bool Reciprocal) {
18274   EVT VT = Arg.getValueType();
18275   SDLoc DL(Arg);
18276   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18277   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18278 
18279   // This routine must enter the loop below to work correctly
18280   // when (Reciprocal == false).
18281   assert(Iterations > 0);
18282 
18283   // Newton iterations for reciprocal square root:
18284   // E = (E * -0.5) * ((A * E) * E + -3.0)
18285   for (unsigned i = 0; i < Iterations; ++i) {
18286     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18287     AddToWorklist(AE.getNode());
18288 
18289     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18290     AddToWorklist(AEE.getNode());
18291 
18292     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18293     AddToWorklist(RHS.getNode());
18294 
18295     // When calculating a square root at the last iteration build:
18296     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18297     // (notice a common subexpression)
18298     SDValue LHS;
18299     if (Reciprocal || (i + 1) < Iterations) {
18300       // RSQRT: LHS = (E * -0.5)
18301       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18302     } else {
18303       // SQRT: LHS = (A * E) * -0.5
18304       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18305     }
18306     AddToWorklist(LHS.getNode());
18307 
18308     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18309     AddToWorklist(Est.getNode());
18310   }
18311 
18312   return Est;
18313 }
18314 
18315 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18316 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18317 /// Op can be zero.
18318 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18319                                            bool Reciprocal) {
18320   if (Level >= AfterLegalizeDAG)
18321     return SDValue();
18322 
18323   // TODO: Handle half and/or extended types?
18324   EVT VT = Op.getValueType();
18325   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18326     return SDValue();
18327 
18328   // If estimates are explicitly disabled for this function, we're done.
18329   MachineFunction &MF = DAG.getMachineFunction();
18330   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18331   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18332     return SDValue();
18333 
18334   // Estimates may be explicitly enabled for this type with a custom number of
18335   // refinement steps.
18336   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18337 
18338   bool UseOneConstNR = false;
18339   if (SDValue Est =
18340       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18341                           Reciprocal)) {
18342     AddToWorklist(Est.getNode());
18343 
18344     if (Iterations) {
18345       Est = UseOneConstNR
18346             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18347             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18348 
18349       if (!Reciprocal) {
18350         // The estimate is now completely wrong if the input was exactly 0.0 or
18351         // possibly a denormal. Force the answer to 0.0 for those cases.
18352         EVT VT = Op.getValueType();
18353         SDLoc DL(Op);
18354         EVT CCVT = getSetCCResultType(VT);
18355         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18356         const Function &F = DAG.getMachineFunction().getFunction();
18357         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18358         if (Denorms.getValueAsString().equals("ieee")) {
18359           // fabs(X) < SmallestNormal ? 0.0 : Est
18360           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18361           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18362           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18363           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18364           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18365           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
18366           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
18367           AddToWorklist(Fabs.getNode());
18368           AddToWorklist(IsDenorm.getNode());
18369           AddToWorklist(Est.getNode());
18370         } else {
18371           // X == 0.0 ? 0.0 : Est
18372           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18373           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
18374           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
18375           AddToWorklist(IsZero.getNode());
18376           AddToWorklist(Est.getNode());
18377         }
18378       }
18379     }
18380     return Est;
18381   }
18382 
18383   return SDValue();
18384 }
18385 
18386 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18387   return buildSqrtEstimateImpl(Op, Flags, true);
18388 }
18389 
18390 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18391   return buildSqrtEstimateImpl(Op, Flags, false);
18392 }
18393 
18394 /// Return true if there is any possibility that the two addresses overlap.
18395 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
18396   // If they are the same then they must be aliases.
18397   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
18398 
18399   // If they are both volatile then they cannot be reordered.
18400   if (Op0->isVolatile() && Op1->isVolatile()) return true;
18401 
18402   // If one operation reads from invariant memory, and the other may store, they
18403   // cannot alias. These should really be checking the equivalent of mayWrite,
18404   // but it only matters for memory nodes other than load /store.
18405   if (Op0->isInvariant() && Op1->writeMem())
18406     return false;
18407 
18408   if (Op1->isInvariant() && Op0->writeMem())
18409     return false;
18410 
18411   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
18412   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
18413 
18414   // Check for BaseIndexOffset matching.
18415   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
18416   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
18417   int64_t PtrDiff;
18418   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
18419     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
18420       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
18421 
18422     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
18423     // able to calculate their relative offset if at least one arises
18424     // from an alloca. However, these allocas cannot overlap and we
18425     // can infer there is no alias.
18426     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18427       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18428         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18429         // If the base are the same frame index but the we couldn't find a
18430         // constant offset, (indices are different) be conservative.
18431         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18432                        !MFI.isFixedObjectIndex(B->getIndex())))
18433           return false;
18434       }
18435 
18436     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18437     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18438     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18439     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18440     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18441     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18442 
18443     // If of mismatched base types or checkable indices we can check
18444     // they do not alias.
18445     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18446          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18447         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18448       return false;
18449   }
18450 
18451   // If we know required SrcValue1 and SrcValue2 have relatively large
18452   // alignment compared to the size and offset of the access, we may be able
18453   // to prove they do not alias. This check is conservative for now to catch
18454   // cases created by splitting vector types.
18455   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18456   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18457   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18458   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18459   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18460       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18461     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18462     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18463 
18464     // There is no overlap between these relatively aligned accesses of
18465     // similar size. Return no alias.
18466     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18467         (OffAlign1 + NumBytes1) <= OffAlign0)
18468       return false;
18469   }
18470 
18471   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18472                    ? CombinerGlobalAA
18473                    : DAG.getSubtarget().useAA();
18474 #ifndef NDEBUG
18475   if (CombinerAAOnlyFunc.getNumOccurrences() &&
18476       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18477     UseAA = false;
18478 #endif
18479 
18480   if (UseAA && AA &&
18481       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18482     // Use alias analysis information.
18483     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18484     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18485     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18486     AliasResult AAResult =
18487         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18488                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18489                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18490                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18491     if (AAResult == NoAlias)
18492       return false;
18493   }
18494 
18495   // Otherwise we have to assume they alias.
18496   return true;
18497 }
18498 
18499 /// Walk up chain skipping non-aliasing memory nodes,
18500 /// looking for aliasing nodes and adding them to the Aliases vector.
18501 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18502                                    SmallVectorImpl<SDValue> &Aliases) {
18503   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
18504   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
18505 
18506   // Get alias information for node.
18507   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18508 
18509   // Starting off.
18510   Chains.push_back(OriginalChain);
18511   unsigned Depth = 0;
18512 
18513   // Look at each chain and determine if it is an alias.  If so, add it to the
18514   // aliases list.  If not, then continue up the chain looking for the next
18515   // candidate.
18516   while (!Chains.empty()) {
18517     SDValue Chain = Chains.pop_back_val();
18518 
18519     // For TokenFactor nodes, look at each operand and only continue up the
18520     // chain until we reach the depth limit.
18521     //
18522     // FIXME: The depth check could be made to return the last non-aliasing
18523     // chain we found before we hit a tokenfactor rather than the original
18524     // chain.
18525     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18526       Aliases.clear();
18527       Aliases.push_back(OriginalChain);
18528       return;
18529     }
18530 
18531     // Don't bother if we've been before.
18532     if (!Visited.insert(Chain.getNode()).second)
18533       continue;
18534 
18535     switch (Chain.getOpcode()) {
18536     case ISD::EntryToken:
18537       // Entry token is ideal chain operand, but handled in FindBetterChain.
18538       break;
18539 
18540     case ISD::LOAD:
18541     case ISD::STORE: {
18542       // Get alias information for Chain.
18543       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18544           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18545 
18546       // If chain is alias then stop here.
18547       if (!(IsLoad && IsOpLoad) &&
18548           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18549         Aliases.push_back(Chain);
18550       } else {
18551         // Look further up the chain.
18552         Chains.push_back(Chain.getOperand(0));
18553         ++Depth;
18554       }
18555       break;
18556     }
18557 
18558     case ISD::TokenFactor:
18559       // We have to check each of the operands of the token factor for "small"
18560       // token factors, so we queue them up.  Adding the operands to the queue
18561       // (stack) in reverse order maintains the original order and increases the
18562       // likelihood that getNode will find a matching token factor (CSE.)
18563       if (Chain.getNumOperands() > 16) {
18564         Aliases.push_back(Chain);
18565         break;
18566       }
18567       for (unsigned n = Chain.getNumOperands(); n;)
18568         Chains.push_back(Chain.getOperand(--n));
18569       ++Depth;
18570       break;
18571 
18572     case ISD::CopyFromReg:
18573       // Forward past CopyFromReg.
18574       Chains.push_back(Chain.getOperand(0));
18575       ++Depth;
18576       break;
18577 
18578     default:
18579       // For all other instructions we will just have to take what we can get.
18580       Aliases.push_back(Chain);
18581       break;
18582     }
18583   }
18584 }
18585 
18586 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18587 /// (aliasing node.)
18588 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18589   if (OptLevel == CodeGenOpt::None)
18590     return OldChain;
18591 
18592   // Ops for replacing token factor.
18593   SmallVector<SDValue, 8> Aliases;
18594 
18595   // Accumulate all the aliases to this node.
18596   GatherAllAliases(N, OldChain, Aliases);
18597 
18598   // If no operands then chain to entry token.
18599   if (Aliases.size() == 0)
18600     return DAG.getEntryNode();
18601 
18602   // If a single operand then chain to it.  We don't need to revisit it.
18603   if (Aliases.size() == 1)
18604     return Aliases[0];
18605 
18606   // Construct a custom tailored token factor.
18607   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18608 }
18609 
18610 // This function tries to collect a bunch of potentially interesting
18611 // nodes to improve the chains of, all at once. This might seem
18612 // redundant, as this function gets called when visiting every store
18613 // node, so why not let the work be done on each store as it's visited?
18614 //
18615 // I believe this is mainly important because MergeConsecutiveStores
18616 // is unable to deal with merging stores of different sizes, so unless
18617 // we improve the chains of all the potential candidates up-front
18618 // before running MergeConsecutiveStores, it might only see some of
18619 // the nodes that will eventually be candidates, and then not be able
18620 // to go from a partially-merged state to the desired final
18621 // fully-merged state.
18622 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
18623   if (OptLevel == CodeGenOpt::None)
18624     return false;
18625 
18626   // This holds the base pointer, index, and the offset in bytes from the base
18627   // pointer.
18628   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18629 
18630   // We must have a base and an offset.
18631   if (!BasePtr.getBase().getNode())
18632     return false;
18633 
18634   // Do not handle stores to undef base pointers.
18635   if (BasePtr.getBase().isUndef())
18636     return false;
18637 
18638   SmallVector<StoreSDNode *, 8> ChainedStores;
18639   ChainedStores.push_back(St);
18640 
18641   // Walk up the chain and look for nodes with offsets from the same
18642   // base pointer. Stop when reaching an instruction with a different kind
18643   // or instruction which has a different base pointer.
18644   StoreSDNode *Index = St;
18645   while (Index) {
18646     // If the chain has more than one use, then we can't reorder the mem ops.
18647     if (Index != St && !SDValue(Index, 0)->hasOneUse())
18648       break;
18649 
18650     if (Index->isVolatile() || Index->isIndexed())
18651       break;
18652 
18653     // Find the base pointer and offset for this memory node.
18654     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
18655 
18656     // Check that the base pointer is the same as the original one.
18657     if (!BasePtr.equalBaseIndex(Ptr, DAG))
18658       break;
18659 
18660     // Walk up the chain to find the next store node, ignoring any
18661     // intermediate loads. Any other kind of node will halt the loop.
18662     SDNode *NextInChain = Index->getChain().getNode();
18663     while (true) {
18664       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
18665         // We found a store node. Use it for the next iteration.
18666         if (STn->isVolatile() || STn->isIndexed()) {
18667           Index = nullptr;
18668           break;
18669         }
18670         ChainedStores.push_back(STn);
18671         Index = STn;
18672         break;
18673       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
18674         NextInChain = Ldn->getChain().getNode();
18675         continue;
18676       } else {
18677         Index = nullptr;
18678         break;
18679       }
18680     }// end while
18681   }
18682 
18683   // At this point, ChainedStores lists all of the Store nodes
18684   // reachable by iterating up through chain nodes matching the above
18685   // conditions.  For each such store identified, try to find an
18686   // earlier chain to attach the store to which won't violate the
18687   // required ordering.
18688   bool MadeChangeToSt = false;
18689   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
18690 
18691   for (StoreSDNode *ChainedStore : ChainedStores) {
18692     SDValue Chain = ChainedStore->getChain();
18693     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
18694 
18695     if (Chain != BetterChain) {
18696       if (ChainedStore == St)
18697         MadeChangeToSt = true;
18698       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
18699     }
18700   }
18701 
18702   // Do all replacements after finding the replacements to make to avoid making
18703   // the chains more complicated by introducing new TokenFactors.
18704   for (auto Replacement : BetterChains)
18705     replaceStoreChain(Replacement.first, Replacement.second);
18706 
18707   return MadeChangeToSt;
18708 }
18709 
18710 /// This is the entry point for the file.
18711 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
18712                            CodeGenOpt::Level OptLevel) {
18713   /// This is the main entry point to this class.
18714   DAGCombiner(*this, AA, OptLevel).Run(Level);
18715 }
18716