1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
29 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetOptions.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
43 #include "llvm/Target/TargetSubtargetInfo.h"
44 #include <algorithm>
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "dagcombine"
48 
49 STATISTIC(NodesCombined   , "Number of dag nodes combined");
50 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
51 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
52 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
53 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
54 STATISTIC(SlicedLoads, "Number of load sliced");
55 
56 namespace {
57   static cl::opt<bool>
58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61   static cl::opt<bool>
62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63                cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
66   static cl::opt<std::string>
67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68                cl::desc("Only use DAG-combiner alias analysis in this"
69                         " function"));
70 #endif
71 
72   /// Hidden option to stress test load slicing, i.e., when this option
73   /// is enabled, load slicing bypasses most of its profitability guards.
74   static cl::opt<bool>
75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76                     cl::desc("Bypass the profitability model of load "
77                              "slicing"),
78                     cl::init(false));
79 
80   static cl::opt<bool>
81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82                       cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86   class DAGCombiner {
87     SelectionDAG &DAG;
88     const TargetLowering &TLI;
89     CombineLevel Level;
90     CodeGenOpt::Level OptLevel;
91     bool LegalOperations;
92     bool LegalTypes;
93     bool ForCodeSize;
94 
95     /// \brief Worklist of all of the nodes that need to be simplified.
96     ///
97     /// This must behave as a stack -- new nodes to process are pushed onto the
98     /// back and when processing we pop off of the back.
99     ///
100     /// The worklist will not contain duplicates but may contain null entries
101     /// due to nodes being deleted from the underlying DAG.
102     SmallVector<SDNode *, 64> Worklist;
103 
104     /// \brief Mapping from an SDNode to its position on the worklist.
105     ///
106     /// This is used to find and remove nodes from the worklist (by nulling
107     /// them) when they are deleted from the underlying DAG. It relies on
108     /// stable indices of nodes within the worklist.
109     DenseMap<SDNode *, unsigned> WorklistMap;
110 
111     /// \brief Set of nodes which have been combined (at least once).
112     ///
113     /// This is used to allow us to reliably add any operands of a DAG node
114     /// which have not yet been combined to the worklist.
115     SmallPtrSet<SDNode *, 32> CombinedNodes;
116 
117     // AA - Used for DAG load/store alias analysis.
118     AliasAnalysis *AA;
119 
120     /// When an instruction is simplified, add all users of the instruction to
121     /// the work lists because they might get more simplified now.
122     void AddUsersToWorklist(SDNode *N) {
123       for (SDNode *Node : N->uses())
124         AddToWorklist(Node);
125     }
126 
127     /// Call the node-specific routine that folds each particular type of node.
128     SDValue visit(SDNode *N);
129 
130   public:
131     /// Add to the worklist making sure its instance is at the back (next to be
132     /// processed.)
133     void AddToWorklist(SDNode *N) {
134       assert(N->getOpcode() != ISD::DELETED_NODE &&
135              "Deleted Node added to Worklist");
136 
137       // Skip handle nodes as they can't usefully be combined and confuse the
138       // zero-use deletion strategy.
139       if (N->getOpcode() == ISD::HANDLENODE)
140         return;
141 
142       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
143         Worklist.push_back(N);
144     }
145 
146     /// Remove all instances of N from the worklist.
147     void removeFromWorklist(SDNode *N) {
148       CombinedNodes.erase(N);
149 
150       auto It = WorklistMap.find(N);
151       if (It == WorklistMap.end())
152         return; // Not in the worklist.
153 
154       // Null out the entry rather than erasing it to avoid a linear operation.
155       Worklist[It->second] = nullptr;
156       WorklistMap.erase(It);
157     }
158 
159     void deleteAndRecombine(SDNode *N);
160     bool recursivelyDeleteUnusedNodes(SDNode *N);
161 
162     /// Replaces all uses of the results of one DAG node with new values.
163     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
164                       bool AddTo = true);
165 
166     /// Replaces all uses of the results of one DAG node with new values.
167     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
168       return CombineTo(N, &Res, 1, AddTo);
169     }
170 
171     /// Replaces all uses of the results of one DAG node with new values.
172     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
173                       bool AddTo = true) {
174       SDValue To[] = { Res0, Res1 };
175       return CombineTo(N, To, 2, AddTo);
176     }
177 
178     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
179 
180   private:
181     unsigned MaximumLegalStoreInBits;
182 
183     /// Check the specified integer node value to see if it can be simplified or
184     /// if things it uses can be simplified by bit propagation.
185     /// If so, return true.
186     bool SimplifyDemandedBits(SDValue Op) {
187       unsigned BitWidth = Op.getScalarValueSizeInBits();
188       APInt Demanded = APInt::getAllOnesValue(BitWidth);
189       return SimplifyDemandedBits(Op, Demanded);
190     }
191 
192     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
193 
194     bool CombineToPreIndexedLoadStore(SDNode *N);
195     bool CombineToPostIndexedLoadStore(SDNode *N);
196     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
197     bool SliceUpLoad(SDNode *N);
198 
199     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
200     ///   load.
201     ///
202     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
203     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
204     /// \param EltNo index of the vector element to load.
205     /// \param OriginalLoad load that EVE came from to be replaced.
206     /// \returns EVE on success SDValue() on failure.
207     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
208         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
209     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
210     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
211     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
212     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
213     SDValue PromoteIntBinOp(SDValue Op);
214     SDValue PromoteIntShiftOp(SDValue Op);
215     SDValue PromoteExtend(SDValue Op);
216     bool PromoteLoad(SDValue Op);
217 
218     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
219                          SDValue ExtLoad, const SDLoc &DL,
220                          ISD::NodeType ExtType);
221 
222     /// Call the node-specific routine that knows how to fold each
223     /// particular type of node. If that doesn't do anything, try the
224     /// target-specific DAG combines.
225     SDValue combine(SDNode *N);
226 
227     // Visitation implementation - Implement dag node combining for different
228     // node types.  The semantics are as follows:
229     // Return Value:
230     //   SDValue.getNode() == 0 - No change was made
231     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
232     //   otherwise              - N should be replaced by the returned Operand.
233     //
234     SDValue visitTokenFactor(SDNode *N);
235     SDValue visitMERGE_VALUES(SDNode *N);
236     SDValue visitADD(SDNode *N);
237     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
238     SDValue visitSUB(SDNode *N);
239     SDValue visitADDC(SDNode *N);
240     SDValue visitUADDO(SDNode *N);
241     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
242     SDValue visitSUBC(SDNode *N);
243     SDValue visitUSUBO(SDNode *N);
244     SDValue visitADDE(SDNode *N);
245     SDValue visitADDCARRY(SDNode *N);
246     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
247     SDValue visitSUBE(SDNode *N);
248     SDValue visitSUBCARRY(SDNode *N);
249     SDValue visitMUL(SDNode *N);
250     SDValue useDivRem(SDNode *N);
251     SDValue visitSDIV(SDNode *N);
252     SDValue visitUDIV(SDNode *N);
253     SDValue visitREM(SDNode *N);
254     SDValue visitMULHU(SDNode *N);
255     SDValue visitMULHS(SDNode *N);
256     SDValue visitSMUL_LOHI(SDNode *N);
257     SDValue visitUMUL_LOHI(SDNode *N);
258     SDValue visitSMULO(SDNode *N);
259     SDValue visitUMULO(SDNode *N);
260     SDValue visitIMINMAX(SDNode *N);
261     SDValue visitAND(SDNode *N);
262     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
263     SDValue visitOR(SDNode *N);
264     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
265     SDValue visitXOR(SDNode *N);
266     SDValue SimplifyVBinOp(SDNode *N);
267     SDValue visitSHL(SDNode *N);
268     SDValue visitSRA(SDNode *N);
269     SDValue visitSRL(SDNode *N);
270     SDValue visitRotate(SDNode *N);
271     SDValue visitABS(SDNode *N);
272     SDValue visitBSWAP(SDNode *N);
273     SDValue visitBITREVERSE(SDNode *N);
274     SDValue visitCTLZ(SDNode *N);
275     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
276     SDValue visitCTTZ(SDNode *N);
277     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
278     SDValue visitCTPOP(SDNode *N);
279     SDValue visitSELECT(SDNode *N);
280     SDValue visitVSELECT(SDNode *N);
281     SDValue visitSELECT_CC(SDNode *N);
282     SDValue visitSETCC(SDNode *N);
283     SDValue visitSETCCE(SDNode *N);
284     SDValue visitSETCCCARRY(SDNode *N);
285     SDValue visitSIGN_EXTEND(SDNode *N);
286     SDValue visitZERO_EXTEND(SDNode *N);
287     SDValue visitANY_EXTEND(SDNode *N);
288     SDValue visitAssertZext(SDNode *N);
289     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
290     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
291     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
292     SDValue visitTRUNCATE(SDNode *N);
293     SDValue visitBITCAST(SDNode *N);
294     SDValue visitBUILD_PAIR(SDNode *N);
295     SDValue visitFADD(SDNode *N);
296     SDValue visitFSUB(SDNode *N);
297     SDValue visitFMUL(SDNode *N);
298     SDValue visitFMA(SDNode *N);
299     SDValue visitFDIV(SDNode *N);
300     SDValue visitFREM(SDNode *N);
301     SDValue visitFSQRT(SDNode *N);
302     SDValue visitFCOPYSIGN(SDNode *N);
303     SDValue visitSINT_TO_FP(SDNode *N);
304     SDValue visitUINT_TO_FP(SDNode *N);
305     SDValue visitFP_TO_SINT(SDNode *N);
306     SDValue visitFP_TO_UINT(SDNode *N);
307     SDValue visitFP_ROUND(SDNode *N);
308     SDValue visitFP_ROUND_INREG(SDNode *N);
309     SDValue visitFP_EXTEND(SDNode *N);
310     SDValue visitFNEG(SDNode *N);
311     SDValue visitFABS(SDNode *N);
312     SDValue visitFCEIL(SDNode *N);
313     SDValue visitFTRUNC(SDNode *N);
314     SDValue visitFFLOOR(SDNode *N);
315     SDValue visitFMINNUM(SDNode *N);
316     SDValue visitFMAXNUM(SDNode *N);
317     SDValue visitBRCOND(SDNode *N);
318     SDValue visitBR_CC(SDNode *N);
319     SDValue visitLOAD(SDNode *N);
320 
321     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
322     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
323 
324     SDValue visitSTORE(SDNode *N);
325     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
326     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
327     SDValue visitBUILD_VECTOR(SDNode *N);
328     SDValue visitCONCAT_VECTORS(SDNode *N);
329     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
330     SDValue visitVECTOR_SHUFFLE(SDNode *N);
331     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
332     SDValue visitINSERT_SUBVECTOR(SDNode *N);
333     SDValue visitMLOAD(SDNode *N);
334     SDValue visitMSTORE(SDNode *N);
335     SDValue visitMGATHER(SDNode *N);
336     SDValue visitMSCATTER(SDNode *N);
337     SDValue visitFP_TO_FP16(SDNode *N);
338     SDValue visitFP16_TO_FP(SDNode *N);
339 
340     SDValue visitFADDForFMACombine(SDNode *N);
341     SDValue visitFSUBForFMACombine(SDNode *N);
342     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
343 
344     SDValue XformToShuffleWithZero(SDNode *N);
345     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
346                            SDValue RHS);
347 
348     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
349 
350     SDValue foldSelectOfConstants(SDNode *N);
351     SDValue foldBinOpIntoSelect(SDNode *BO);
352     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
353     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
354     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
355     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
356                              SDValue N2, SDValue N3, ISD::CondCode CC,
357                              bool NotExtCompare = false);
358     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
359                                    SDValue N2, SDValue N3, ISD::CondCode CC);
360     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
361                               const SDLoc &DL);
362     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
363                           const SDLoc &DL, bool foldBooleans = true);
364 
365     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
366                            SDValue &CC) const;
367     bool isOneUseSetCC(SDValue N) const;
368 
369     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
370                                          unsigned HiOp);
371     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
372     SDValue CombineExtLoad(SDNode *N);
373     SDValue combineRepeatedFPDivisors(SDNode *N);
374     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
375     SDValue BuildSDIV(SDNode *N);
376     SDValue BuildSDIVPow2(SDNode *N);
377     SDValue BuildUDIV(SDNode *N);
378     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
379     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
380     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
381     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
382     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
383     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
384                                 SDNodeFlags Flags, bool Reciprocal);
385     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
386                                 SDNodeFlags Flags, bool Reciprocal);
387     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
388                                bool DemandHighBits = true);
389     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
390     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
391                               SDValue InnerPos, SDValue InnerNeg,
392                               unsigned PosOpcode, unsigned NegOpcode,
393                               const SDLoc &DL);
394     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
395     SDValue MatchLoadCombine(SDNode *N);
396     SDValue ReduceLoadWidth(SDNode *N);
397     SDValue ReduceLoadOpStoreWidth(SDNode *N);
398     SDValue splitMergedValStore(StoreSDNode *ST);
399     SDValue TransformFPLoadStorePair(SDNode *N);
400     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
401     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
402     SDValue reduceBuildVecToShuffle(SDNode *N);
403     SDValue reduceBuildVecToTrunc(SDNode *N);
404     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
405                                   ArrayRef<int> VectorMask, SDValue VecIn1,
406                                   SDValue VecIn2, unsigned LeftIdx);
407     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
408 
409     /// Walk up chain skipping non-aliasing memory nodes,
410     /// looking for aliasing nodes and adding them to the Aliases vector.
411     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
412                           SmallVectorImpl<SDValue> &Aliases);
413 
414     /// Return true if there is any possibility that the two addresses overlap.
415     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
416 
417     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
418     /// chain (aliasing node.)
419     SDValue FindBetterChain(SDNode *N, SDValue Chain);
420 
421     /// Try to replace a store and any possibly adjacent stores on
422     /// consecutive chains with better chains. Return true only if St is
423     /// replaced.
424     ///
425     /// Notice that other chains may still be replaced even if the function
426     /// returns false.
427     bool findBetterNeighborChains(StoreSDNode *St);
428 
429     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
430     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
431 
432     /// Holds a pointer to an LSBaseSDNode as well as information on where it
433     /// is located in a sequence of memory operations connected by a chain.
434     struct MemOpLink {
435       MemOpLink(LSBaseSDNode *N, int64_t Offset)
436           : MemNode(N), OffsetFromBase(Offset) {}
437       // Ptr to the mem node.
438       LSBaseSDNode *MemNode;
439       // Offset from the base ptr.
440       int64_t OffsetFromBase;
441     };
442 
443     /// This is a helper function for visitMUL to check the profitability
444     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
445     /// MulNode is the original multiply, AddNode is (add x, c1),
446     /// and ConstNode is c2.
447     bool isMulAddWithConstProfitable(SDNode *MulNode,
448                                      SDValue &AddNode,
449                                      SDValue &ConstNode);
450 
451 
452     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
453     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
454     /// the type of the loaded value to be extended.  LoadedVT returns the type
455     /// of the original loaded value.  NarrowLoad returns whether the load would
456     /// need to be narrowed in order to match.
457     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
458                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
459                           bool &NarrowLoad);
460 
461     /// Helper function for MergeConsecutiveStores which merges the
462     /// component store chains.
463     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
464                                 unsigned NumStores);
465 
466     /// This is a helper function for MergeConsecutiveStores. When the source
467     /// elements of the consecutive stores are all constants or all extracted
468     /// vector elements, try to merge them into one larger store.
469     /// \return True if a merged store was created.
470     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
471                                          EVT MemVT, unsigned NumStores,
472                                          bool IsConstantSrc, bool UseVector,
473                                          bool UseTrunc);
474 
475     /// This is a helper function for MergeConsecutiveStores.
476     /// Stores that may be merged are placed in StoreNodes.
477     void getStoreMergeCandidates(StoreSDNode *St,
478                                  SmallVectorImpl<MemOpLink> &StoreNodes);
479 
480     /// Helper function for MergeConsecutiveStores. Checks if
481     /// Candidate stores have indirect dependency through their
482     /// operands. \return True if safe to merge
483     bool checkMergeStoreCandidatesForDependencies(
484         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
485 
486     /// Merge consecutive store operations into a wide store.
487     /// This optimization uses wide integers or vectors when possible.
488     /// \return number of stores that were merged into a merged store (the
489     /// affected nodes are stored as a prefix in \p StoreNodes).
490     bool MergeConsecutiveStores(StoreSDNode *N);
491 
492     /// \brief Try to transform a truncation where C is a constant:
493     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
494     ///
495     /// \p N needs to be a truncation and its first operand an AND. Other
496     /// requirements are checked by the function (e.g. that trunc is
497     /// single-use) and if missed an empty SDValue is returned.
498     SDValue distributeTruncateThroughAnd(SDNode *N);
499 
500   public:
501     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
502         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
503           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
504       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
505 
506       MaximumLegalStoreInBits = 0;
507       for (MVT VT : MVT::all_valuetypes())
508         if (EVT(VT).isSimple() && VT != MVT::Other &&
509             TLI.isTypeLegal(EVT(VT)) &&
510             VT.getSizeInBits() >= MaximumLegalStoreInBits)
511           MaximumLegalStoreInBits = VT.getSizeInBits();
512     }
513 
514     /// Runs the dag combiner on all nodes in the work list
515     void Run(CombineLevel AtLevel);
516 
517     SelectionDAG &getDAG() const { return DAG; }
518 
519     /// Returns a type large enough to hold any valid shift amount - before type
520     /// legalization these can be huge.
521     EVT getShiftAmountTy(EVT LHSTy) {
522       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
523       if (LHSTy.isVector())
524         return LHSTy;
525       auto &DL = DAG.getDataLayout();
526       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
527                         : TLI.getPointerTy(DL);
528     }
529 
530     /// This method returns true if we are running before type legalization or
531     /// if the specified VT is legal.
532     bool isTypeLegal(const EVT &VT) {
533       if (!LegalTypes) return true;
534       return TLI.isTypeLegal(VT);
535     }
536 
537     /// Convenience wrapper around TargetLowering::getSetCCResultType
538     EVT getSetCCResultType(EVT VT) const {
539       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
540     }
541   };
542 }
543 
544 
545 namespace {
546 /// This class is a DAGUpdateListener that removes any deleted
547 /// nodes from the worklist.
548 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
549   DAGCombiner &DC;
550 public:
551   explicit WorklistRemover(DAGCombiner &dc)
552     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
553 
554   void NodeDeleted(SDNode *N, SDNode *E) override {
555     DC.removeFromWorklist(N);
556   }
557 };
558 }
559 
560 //===----------------------------------------------------------------------===//
561 //  TargetLowering::DAGCombinerInfo implementation
562 //===----------------------------------------------------------------------===//
563 
564 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
565   ((DAGCombiner*)DC)->AddToWorklist(N);
566 }
567 
568 SDValue TargetLowering::DAGCombinerInfo::
569 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
570   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
571 }
572 
573 SDValue TargetLowering::DAGCombinerInfo::
574 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
575   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
576 }
577 
578 
579 SDValue TargetLowering::DAGCombinerInfo::
580 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
581   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
582 }
583 
584 void TargetLowering::DAGCombinerInfo::
585 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
586   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
587 }
588 
589 //===----------------------------------------------------------------------===//
590 // Helper Functions
591 //===----------------------------------------------------------------------===//
592 
593 void DAGCombiner::deleteAndRecombine(SDNode *N) {
594   removeFromWorklist(N);
595 
596   // If the operands of this node are only used by the node, they will now be
597   // dead. Make sure to re-visit them and recursively delete dead nodes.
598   for (const SDValue &Op : N->ops())
599     // For an operand generating multiple values, one of the values may
600     // become dead allowing further simplification (e.g. split index
601     // arithmetic from an indexed load).
602     if (Op->hasOneUse() || Op->getNumValues() > 1)
603       AddToWorklist(Op.getNode());
604 
605   DAG.DeleteNode(N);
606 }
607 
608 /// Return 1 if we can compute the negated form of the specified expression for
609 /// the same cost as the expression itself, or 2 if we can compute the negated
610 /// form more cheaply than the expression itself.
611 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
612                                const TargetLowering &TLI,
613                                const TargetOptions *Options,
614                                unsigned Depth = 0) {
615   // fneg is removable even if it has multiple uses.
616   if (Op.getOpcode() == ISD::FNEG) return 2;
617 
618   // Don't allow anything with multiple uses.
619   if (!Op.hasOneUse()) return 0;
620 
621   // Don't recurse exponentially.
622   if (Depth > 6) return 0;
623 
624   switch (Op.getOpcode()) {
625   default: return false;
626   case ISD::ConstantFP: {
627     if (!LegalOperations)
628       return 1;
629 
630     // Don't invert constant FP values after legalization unless the target says
631     // the negated constant is legal.
632     EVT VT = Op.getValueType();
633     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
634       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
635   }
636   case ISD::FADD:
637     // FIXME: determine better conditions for this xform.
638     if (!Options->UnsafeFPMath) return 0;
639 
640     // After operation legalization, it might not be legal to create new FSUBs.
641     if (LegalOperations &&
642         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
643       return 0;
644 
645     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
646     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
647                                     Options, Depth + 1))
648       return V;
649     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
650     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
651                               Depth + 1);
652   case ISD::FSUB:
653     // We can't turn -(A-B) into B-A when we honor signed zeros.
654     if (!Options->NoSignedZerosFPMath &&
655         !Op.getNode()->getFlags().hasNoSignedZeros())
656       return 0;
657 
658     // fold (fneg (fsub A, B)) -> (fsub B, A)
659     return 1;
660 
661   case ISD::FMUL:
662   case ISD::FDIV:
663     if (Options->HonorSignDependentRoundingFPMath()) return 0;
664 
665     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
666     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
667                                     Options, Depth + 1))
668       return V;
669 
670     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
671                               Depth + 1);
672 
673   case ISD::FP_EXTEND:
674   case ISD::FP_ROUND:
675   case ISD::FSIN:
676     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
677                               Depth + 1);
678   }
679 }
680 
681 /// If isNegatibleForFree returns true, return the newly negated expression.
682 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
683                                     bool LegalOperations, unsigned Depth = 0) {
684   const TargetOptions &Options = DAG.getTarget().Options;
685   // fneg is removable even if it has multiple uses.
686   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
687 
688   // Don't allow anything with multiple uses.
689   assert(Op.hasOneUse() && "Unknown reuse!");
690 
691   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
692 
693   const SDNodeFlags Flags = Op.getNode()->getFlags();
694 
695   switch (Op.getOpcode()) {
696   default: llvm_unreachable("Unknown code");
697   case ISD::ConstantFP: {
698     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
699     V.changeSign();
700     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
701   }
702   case ISD::FADD:
703     // FIXME: determine better conditions for this xform.
704     assert(Options.UnsafeFPMath);
705 
706     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
707     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
708                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
709       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
710                          GetNegatedExpression(Op.getOperand(0), DAG,
711                                               LegalOperations, Depth+1),
712                          Op.getOperand(1), Flags);
713     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
714     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
715                        GetNegatedExpression(Op.getOperand(1), DAG,
716                                             LegalOperations, Depth+1),
717                        Op.getOperand(0), Flags);
718   case ISD::FSUB:
719     // fold (fneg (fsub 0, B)) -> B
720     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
721       if (N0CFP->isZero())
722         return Op.getOperand(1);
723 
724     // fold (fneg (fsub A, B)) -> (fsub B, A)
725     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
726                        Op.getOperand(1), Op.getOperand(0), Flags);
727 
728   case ISD::FMUL:
729   case ISD::FDIV:
730     assert(!Options.HonorSignDependentRoundingFPMath());
731 
732     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
733     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
734                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
735       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
736                          GetNegatedExpression(Op.getOperand(0), DAG,
737                                               LegalOperations, Depth+1),
738                          Op.getOperand(1), Flags);
739 
740     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
741     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
742                        Op.getOperand(0),
743                        GetNegatedExpression(Op.getOperand(1), DAG,
744                                             LegalOperations, Depth+1), Flags);
745 
746   case ISD::FP_EXTEND:
747   case ISD::FSIN:
748     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
749                        GetNegatedExpression(Op.getOperand(0), DAG,
750                                             LegalOperations, Depth+1));
751   case ISD::FP_ROUND:
752       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
753                          GetNegatedExpression(Op.getOperand(0), DAG,
754                                               LegalOperations, Depth+1),
755                          Op.getOperand(1));
756   }
757 }
758 
759 // APInts must be the same size for most operations, this helper
760 // function zero extends the shorter of the pair so that they match.
761 // We provide an Offset so that we can create bitwidths that won't overflow.
762 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
763   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
764   LHS = LHS.zextOrSelf(Bits);
765   RHS = RHS.zextOrSelf(Bits);
766 }
767 
768 // Return true if this node is a setcc, or is a select_cc
769 // that selects between the target values used for true and false, making it
770 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
771 // the appropriate nodes based on the type of node we are checking. This
772 // simplifies life a bit for the callers.
773 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
774                                     SDValue &CC) const {
775   if (N.getOpcode() == ISD::SETCC) {
776     LHS = N.getOperand(0);
777     RHS = N.getOperand(1);
778     CC  = N.getOperand(2);
779     return true;
780   }
781 
782   if (N.getOpcode() != ISD::SELECT_CC ||
783       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
784       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
785     return false;
786 
787   if (TLI.getBooleanContents(N.getValueType()) ==
788       TargetLowering::UndefinedBooleanContent)
789     return false;
790 
791   LHS = N.getOperand(0);
792   RHS = N.getOperand(1);
793   CC  = N.getOperand(4);
794   return true;
795 }
796 
797 /// Return true if this is a SetCC-equivalent operation with only one use.
798 /// If this is true, it allows the users to invert the operation for free when
799 /// it is profitable to do so.
800 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
801   SDValue N0, N1, N2;
802   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
803     return true;
804   return false;
805 }
806 
807 // \brief Returns the SDNode if it is a constant float BuildVector
808 // or constant float.
809 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
810   if (isa<ConstantFPSDNode>(N))
811     return N.getNode();
812   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
813     return N.getNode();
814   return nullptr;
815 }
816 
817 // Determines if it is a constant integer or a build vector of constant
818 // integers (and undefs).
819 // Do not permit build vector implicit truncation.
820 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
821   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
822     return !(Const->isOpaque() && NoOpaques);
823   if (N.getOpcode() != ISD::BUILD_VECTOR)
824     return false;
825   unsigned BitWidth = N.getScalarValueSizeInBits();
826   for (const SDValue &Op : N->op_values()) {
827     if (Op.isUndef())
828       continue;
829     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
830     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
831         (Const->isOpaque() && NoOpaques))
832       return false;
833   }
834   return true;
835 }
836 
837 // Determines if it is a constant null integer or a splatted vector of a
838 // constant null integer (with no undefs).
839 // Build vector implicit truncation is not an issue for null values.
840 static bool isNullConstantOrNullSplatConstant(SDValue N) {
841   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
842     return Splat->isNullValue();
843   return false;
844 }
845 
846 // Determines if it is a constant integer of one or a splatted vector of a
847 // constant integer of one (with no undefs).
848 // Do not permit build vector implicit truncation.
849 static bool isOneConstantOrOneSplatConstant(SDValue N) {
850   unsigned BitWidth = N.getScalarValueSizeInBits();
851   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
852     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
853   return false;
854 }
855 
856 // Determines if it is a constant integer of all ones or a splatted vector of a
857 // constant integer of all ones (with no undefs).
858 // Do not permit build vector implicit truncation.
859 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
860   unsigned BitWidth = N.getScalarValueSizeInBits();
861   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
862     return Splat->isAllOnesValue() &&
863            Splat->getAPIntValue().getBitWidth() == BitWidth;
864   return false;
865 }
866 
867 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
868 // undef's.
869 static bool isAnyConstantBuildVector(const SDNode *N) {
870   return ISD::isBuildVectorOfConstantSDNodes(N) ||
871          ISD::isBuildVectorOfConstantFPSDNodes(N);
872 }
873 
874 // Attempt to match a unary predicate against a scalar/splat constant or
875 // every element of a constant BUILD_VECTOR.
876 static bool matchUnaryPredicate(SDValue Op,
877                                 std::function<bool(ConstantSDNode *)> Match) {
878   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
879     return Match(Cst);
880 
881   if (ISD::BUILD_VECTOR != Op.getOpcode())
882     return false;
883 
884   EVT SVT = Op.getValueType().getScalarType();
885   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
886     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
887     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
888       return false;
889   }
890   return true;
891 }
892 
893 // Attempt to match a binary predicate against a pair of scalar/splat constants
894 // or every element of a pair of constant BUILD_VECTORs.
895 static bool matchBinaryPredicate(
896     SDValue LHS, SDValue RHS,
897     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
898   if (LHS.getValueType() != RHS.getValueType())
899     return false;
900 
901   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
902     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
903       return Match(LHSCst, RHSCst);
904 
905   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
906       ISD::BUILD_VECTOR != RHS.getOpcode())
907     return false;
908 
909   EVT SVT = LHS.getValueType().getScalarType();
910   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
911     auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
912     auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
913     if (!LHSCst || !RHSCst)
914       return false;
915     if (LHSCst->getValueType(0) != SVT ||
916         LHSCst->getValueType(0) != RHSCst->getValueType(0))
917       return false;
918     if (!Match(LHSCst, RHSCst))
919       return false;
920   }
921   return true;
922 }
923 
924 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
925                                     SDValue N1) {
926   EVT VT = N0.getValueType();
927   if (N0.getOpcode() == Opc) {
928     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
929       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
930         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
931         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
932           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
933         return SDValue();
934       }
935       if (N0.hasOneUse()) {
936         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
937         // use
938         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
939         if (!OpNode.getNode())
940           return SDValue();
941         AddToWorklist(OpNode.getNode());
942         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
943       }
944     }
945   }
946 
947   if (N1.getOpcode() == Opc) {
948     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
949       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
950         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
951         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
952           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
953         return SDValue();
954       }
955       if (N1.hasOneUse()) {
956         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
957         // use
958         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
959         if (!OpNode.getNode())
960           return SDValue();
961         AddToWorklist(OpNode.getNode());
962         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
963       }
964     }
965   }
966 
967   return SDValue();
968 }
969 
970 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
971                                bool AddTo) {
972   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
973   ++NodesCombined;
974   DEBUG(dbgs() << "\nReplacing.1 ";
975         N->dump(&DAG);
976         dbgs() << "\nWith: ";
977         To[0].getNode()->dump(&DAG);
978         dbgs() << " and " << NumTo-1 << " other values\n");
979   for (unsigned i = 0, e = NumTo; i != e; ++i)
980     assert((!To[i].getNode() ||
981             N->getValueType(i) == To[i].getValueType()) &&
982            "Cannot combine value to value of different type!");
983 
984   WorklistRemover DeadNodes(*this);
985   DAG.ReplaceAllUsesWith(N, To);
986   if (AddTo) {
987     // Push the new nodes and any users onto the worklist
988     for (unsigned i = 0, e = NumTo; i != e; ++i) {
989       if (To[i].getNode()) {
990         AddToWorklist(To[i].getNode());
991         AddUsersToWorklist(To[i].getNode());
992       }
993     }
994   }
995 
996   // Finally, if the node is now dead, remove it from the graph.  The node
997   // may not be dead if the replacement process recursively simplified to
998   // something else needing this node.
999   if (N->use_empty())
1000     deleteAndRecombine(N);
1001   return SDValue(N, 0);
1002 }
1003 
1004 void DAGCombiner::
1005 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1006   // Replace all uses.  If any nodes become isomorphic to other nodes and
1007   // are deleted, make sure to remove them from our worklist.
1008   WorklistRemover DeadNodes(*this);
1009   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1010 
1011   // Push the new node and any (possibly new) users onto the worklist.
1012   AddToWorklist(TLO.New.getNode());
1013   AddUsersToWorklist(TLO.New.getNode());
1014 
1015   // Finally, if the node is now dead, remove it from the graph.  The node
1016   // may not be dead if the replacement process recursively simplified to
1017   // something else needing this node.
1018   if (TLO.Old.getNode()->use_empty())
1019     deleteAndRecombine(TLO.Old.getNode());
1020 }
1021 
1022 /// Check the specified integer node value to see if it can be simplified or if
1023 /// things it uses can be simplified by bit propagation. If so, return true.
1024 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1025   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1026   KnownBits Known;
1027   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1028     return false;
1029 
1030   // Revisit the node.
1031   AddToWorklist(Op.getNode());
1032 
1033   // Replace the old value with the new one.
1034   ++NodesCombined;
1035   DEBUG(dbgs() << "\nReplacing.2 ";
1036         TLO.Old.getNode()->dump(&DAG);
1037         dbgs() << "\nWith: ";
1038         TLO.New.getNode()->dump(&DAG);
1039         dbgs() << '\n');
1040 
1041   CommitTargetLoweringOpt(TLO);
1042   return true;
1043 }
1044 
1045 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1046   SDLoc DL(Load);
1047   EVT VT = Load->getValueType(0);
1048   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1049 
1050   DEBUG(dbgs() << "\nReplacing.9 ";
1051         Load->dump(&DAG);
1052         dbgs() << "\nWith: ";
1053         Trunc.getNode()->dump(&DAG);
1054         dbgs() << '\n');
1055   WorklistRemover DeadNodes(*this);
1056   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1057   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1058   deleteAndRecombine(Load);
1059   AddToWorklist(Trunc.getNode());
1060 }
1061 
1062 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1063   Replace = false;
1064   SDLoc DL(Op);
1065   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1066     LoadSDNode *LD = cast<LoadSDNode>(Op);
1067     EVT MemVT = LD->getMemoryVT();
1068     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1069       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1070                                                        : ISD::EXTLOAD)
1071       : LD->getExtensionType();
1072     Replace = true;
1073     return DAG.getExtLoad(ExtType, DL, PVT,
1074                           LD->getChain(), LD->getBasePtr(),
1075                           MemVT, LD->getMemOperand());
1076   }
1077 
1078   unsigned Opc = Op.getOpcode();
1079   switch (Opc) {
1080   default: break;
1081   case ISD::AssertSext:
1082     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1083       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1084     break;
1085   case ISD::AssertZext:
1086     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1087       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1088     break;
1089   case ISD::Constant: {
1090     unsigned ExtOpc =
1091       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1092     return DAG.getNode(ExtOpc, DL, PVT, Op);
1093   }
1094   }
1095 
1096   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1097     return SDValue();
1098   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1099 }
1100 
1101 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1102   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1103     return SDValue();
1104   EVT OldVT = Op.getValueType();
1105   SDLoc DL(Op);
1106   bool Replace = false;
1107   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1108   if (!NewOp.getNode())
1109     return SDValue();
1110   AddToWorklist(NewOp.getNode());
1111 
1112   if (Replace)
1113     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1114   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1115                      DAG.getValueType(OldVT));
1116 }
1117 
1118 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1119   EVT OldVT = Op.getValueType();
1120   SDLoc DL(Op);
1121   bool Replace = false;
1122   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1123   if (!NewOp.getNode())
1124     return SDValue();
1125   AddToWorklist(NewOp.getNode());
1126 
1127   if (Replace)
1128     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1129   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1130 }
1131 
1132 /// Promote the specified integer binary operation if the target indicates it is
1133 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1134 /// i32 since i16 instructions are longer.
1135 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1136   if (!LegalOperations)
1137     return SDValue();
1138 
1139   EVT VT = Op.getValueType();
1140   if (VT.isVector() || !VT.isInteger())
1141     return SDValue();
1142 
1143   // If operation type is 'undesirable', e.g. i16 on x86, consider
1144   // promoting it.
1145   unsigned Opc = Op.getOpcode();
1146   if (TLI.isTypeDesirableForOp(Opc, VT))
1147     return SDValue();
1148 
1149   EVT PVT = VT;
1150   // Consult target whether it is a good idea to promote this operation and
1151   // what's the right type to promote it to.
1152   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1153     assert(PVT != VT && "Don't know what type to promote to!");
1154 
1155     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1156 
1157     bool Replace0 = false;
1158     SDValue N0 = Op.getOperand(0);
1159     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1160 
1161     bool Replace1 = false;
1162     SDValue N1 = Op.getOperand(1);
1163     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1164     SDLoc DL(Op);
1165 
1166     SDValue RV =
1167         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1168 
1169     // New replace instances of N0 and N1
1170     if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
1171         NN0.getOpcode() != ISD::DELETED_NODE) {
1172       AddToWorklist(NN0.getNode());
1173       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1174     }
1175 
1176     if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
1177         NN1.getOpcode() != ISD::DELETED_NODE) {
1178       AddToWorklist(NN1.getNode());
1179       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1180     }
1181 
1182     // Deal with Op being deleted.
1183     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1184       return RV;
1185   }
1186   return SDValue();
1187 }
1188 
1189 /// Promote the specified integer shift operation if the target indicates it is
1190 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1191 /// i32 since i16 instructions are longer.
1192 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1193   if (!LegalOperations)
1194     return SDValue();
1195 
1196   EVT VT = Op.getValueType();
1197   if (VT.isVector() || !VT.isInteger())
1198     return SDValue();
1199 
1200   // If operation type is 'undesirable', e.g. i16 on x86, consider
1201   // promoting it.
1202   unsigned Opc = Op.getOpcode();
1203   if (TLI.isTypeDesirableForOp(Opc, VT))
1204     return SDValue();
1205 
1206   EVT PVT = VT;
1207   // Consult target whether it is a good idea to promote this operation and
1208   // what's the right type to promote it to.
1209   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1210     assert(PVT != VT && "Don't know what type to promote to!");
1211 
1212     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1213 
1214     bool Replace = false;
1215     SDValue N0 = Op.getOperand(0);
1216     SDValue N1 = Op.getOperand(1);
1217     if (Opc == ISD::SRA)
1218       N0 = SExtPromoteOperand(N0, PVT);
1219     else if (Opc == ISD::SRL)
1220       N0 = ZExtPromoteOperand(N0, PVT);
1221     else
1222       N0 = PromoteOperand(N0, PVT, Replace);
1223 
1224     if (!N0.getNode())
1225       return SDValue();
1226 
1227     SDLoc DL(Op);
1228     SDValue RV =
1229         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1230 
1231     AddToWorklist(N0.getNode());
1232     if (Replace)
1233       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1234 
1235     // Deal with Op being deleted.
1236     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1237       return RV;
1238   }
1239   return SDValue();
1240 }
1241 
1242 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1243   if (!LegalOperations)
1244     return SDValue();
1245 
1246   EVT VT = Op.getValueType();
1247   if (VT.isVector() || !VT.isInteger())
1248     return SDValue();
1249 
1250   // If operation type is 'undesirable', e.g. i16 on x86, consider
1251   // promoting it.
1252   unsigned Opc = Op.getOpcode();
1253   if (TLI.isTypeDesirableForOp(Opc, VT))
1254     return SDValue();
1255 
1256   EVT PVT = VT;
1257   // Consult target whether it is a good idea to promote this operation and
1258   // what's the right type to promote it to.
1259   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1260     assert(PVT != VT && "Don't know what type to promote to!");
1261     // fold (aext (aext x)) -> (aext x)
1262     // fold (aext (zext x)) -> (zext x)
1263     // fold (aext (sext x)) -> (sext x)
1264     DEBUG(dbgs() << "\nPromoting ";
1265           Op.getNode()->dump(&DAG));
1266     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1267   }
1268   return SDValue();
1269 }
1270 
1271 bool DAGCombiner::PromoteLoad(SDValue Op) {
1272   if (!LegalOperations)
1273     return false;
1274 
1275   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1276     return false;
1277 
1278   EVT VT = Op.getValueType();
1279   if (VT.isVector() || !VT.isInteger())
1280     return false;
1281 
1282   // If operation type is 'undesirable', e.g. i16 on x86, consider
1283   // promoting it.
1284   unsigned Opc = Op.getOpcode();
1285   if (TLI.isTypeDesirableForOp(Opc, VT))
1286     return false;
1287 
1288   EVT PVT = VT;
1289   // Consult target whether it is a good idea to promote this operation and
1290   // what's the right type to promote it to.
1291   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1292     assert(PVT != VT && "Don't know what type to promote to!");
1293 
1294     SDLoc DL(Op);
1295     SDNode *N = Op.getNode();
1296     LoadSDNode *LD = cast<LoadSDNode>(N);
1297     EVT MemVT = LD->getMemoryVT();
1298     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1299       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1300                                                        : ISD::EXTLOAD)
1301       : LD->getExtensionType();
1302     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1303                                    LD->getChain(), LD->getBasePtr(),
1304                                    MemVT, LD->getMemOperand());
1305     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1306 
1307     DEBUG(dbgs() << "\nPromoting ";
1308           N->dump(&DAG);
1309           dbgs() << "\nTo: ";
1310           Result.getNode()->dump(&DAG);
1311           dbgs() << '\n');
1312     WorklistRemover DeadNodes(*this);
1313     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1314     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1315     deleteAndRecombine(N);
1316     AddToWorklist(Result.getNode());
1317     return true;
1318   }
1319   return false;
1320 }
1321 
1322 /// \brief Recursively delete a node which has no uses and any operands for
1323 /// which it is the only use.
1324 ///
1325 /// Note that this both deletes the nodes and removes them from the worklist.
1326 /// It also adds any nodes who have had a user deleted to the worklist as they
1327 /// may now have only one use and subject to other combines.
1328 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1329   if (!N->use_empty())
1330     return false;
1331 
1332   SmallSetVector<SDNode *, 16> Nodes;
1333   Nodes.insert(N);
1334   do {
1335     N = Nodes.pop_back_val();
1336     if (!N)
1337       continue;
1338 
1339     if (N->use_empty()) {
1340       for (const SDValue &ChildN : N->op_values())
1341         Nodes.insert(ChildN.getNode());
1342 
1343       removeFromWorklist(N);
1344       DAG.DeleteNode(N);
1345     } else {
1346       AddToWorklist(N);
1347     }
1348   } while (!Nodes.empty());
1349   return true;
1350 }
1351 
1352 //===----------------------------------------------------------------------===//
1353 //  Main DAG Combiner implementation
1354 //===----------------------------------------------------------------------===//
1355 
1356 void DAGCombiner::Run(CombineLevel AtLevel) {
1357   // set the instance variables, so that the various visit routines may use it.
1358   Level = AtLevel;
1359   LegalOperations = Level >= AfterLegalizeVectorOps;
1360   LegalTypes = Level >= AfterLegalizeTypes;
1361 
1362   // Add all the dag nodes to the worklist.
1363   for (SDNode &Node : DAG.allnodes())
1364     AddToWorklist(&Node);
1365 
1366   // Create a dummy node (which is not added to allnodes), that adds a reference
1367   // to the root node, preventing it from being deleted, and tracking any
1368   // changes of the root.
1369   HandleSDNode Dummy(DAG.getRoot());
1370 
1371   // While the worklist isn't empty, find a node and try to combine it.
1372   while (!WorklistMap.empty()) {
1373     SDNode *N;
1374     // The Worklist holds the SDNodes in order, but it may contain null entries.
1375     do {
1376       N = Worklist.pop_back_val();
1377     } while (!N);
1378 
1379     bool GoodWorklistEntry = WorklistMap.erase(N);
1380     (void)GoodWorklistEntry;
1381     assert(GoodWorklistEntry &&
1382            "Found a worklist entry without a corresponding map entry!");
1383 
1384     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1385     // N is deleted from the DAG, since they too may now be dead or may have a
1386     // reduced number of uses, allowing other xforms.
1387     if (recursivelyDeleteUnusedNodes(N))
1388       continue;
1389 
1390     WorklistRemover DeadNodes(*this);
1391 
1392     // If this combine is running after legalizing the DAG, re-legalize any
1393     // nodes pulled off the worklist.
1394     if (Level == AfterLegalizeDAG) {
1395       SmallSetVector<SDNode *, 16> UpdatedNodes;
1396       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1397 
1398       for (SDNode *LN : UpdatedNodes) {
1399         AddToWorklist(LN);
1400         AddUsersToWorklist(LN);
1401       }
1402       if (!NIsValid)
1403         continue;
1404     }
1405 
1406     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1407 
1408     // Add any operands of the new node which have not yet been combined to the
1409     // worklist as well. Because the worklist uniques things already, this
1410     // won't repeatedly process the same operand.
1411     CombinedNodes.insert(N);
1412     for (const SDValue &ChildN : N->op_values())
1413       if (!CombinedNodes.count(ChildN.getNode()))
1414         AddToWorklist(ChildN.getNode());
1415 
1416     SDValue RV = combine(N);
1417 
1418     if (!RV.getNode())
1419       continue;
1420 
1421     ++NodesCombined;
1422 
1423     // If we get back the same node we passed in, rather than a new node or
1424     // zero, we know that the node must have defined multiple values and
1425     // CombineTo was used.  Since CombineTo takes care of the worklist
1426     // mechanics for us, we have no work to do in this case.
1427     if (RV.getNode() == N)
1428       continue;
1429 
1430     assert(N->getOpcode() != ISD::DELETED_NODE &&
1431            RV.getOpcode() != ISD::DELETED_NODE &&
1432            "Node was deleted but visit returned new node!");
1433 
1434     DEBUG(dbgs() << " ... into: ";
1435           RV.getNode()->dump(&DAG));
1436 
1437     if (N->getNumValues() == RV.getNode()->getNumValues())
1438       DAG.ReplaceAllUsesWith(N, RV.getNode());
1439     else {
1440       assert(N->getValueType(0) == RV.getValueType() &&
1441              N->getNumValues() == 1 && "Type mismatch");
1442       DAG.ReplaceAllUsesWith(N, &RV);
1443     }
1444 
1445     // Push the new node and any users onto the worklist
1446     AddToWorklist(RV.getNode());
1447     AddUsersToWorklist(RV.getNode());
1448 
1449     // Finally, if the node is now dead, remove it from the graph.  The node
1450     // may not be dead if the replacement process recursively simplified to
1451     // something else needing this node. This will also take care of adding any
1452     // operands which have lost a user to the worklist.
1453     recursivelyDeleteUnusedNodes(N);
1454   }
1455 
1456   // If the root changed (e.g. it was a dead load, update the root).
1457   DAG.setRoot(Dummy.getValue());
1458   DAG.RemoveDeadNodes();
1459 }
1460 
1461 SDValue DAGCombiner::visit(SDNode *N) {
1462   switch (N->getOpcode()) {
1463   default: break;
1464   case ISD::TokenFactor:        return visitTokenFactor(N);
1465   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1466   case ISD::ADD:                return visitADD(N);
1467   case ISD::SUB:                return visitSUB(N);
1468   case ISD::ADDC:               return visitADDC(N);
1469   case ISD::UADDO:              return visitUADDO(N);
1470   case ISD::SUBC:               return visitSUBC(N);
1471   case ISD::USUBO:              return visitUSUBO(N);
1472   case ISD::ADDE:               return visitADDE(N);
1473   case ISD::ADDCARRY:           return visitADDCARRY(N);
1474   case ISD::SUBE:               return visitSUBE(N);
1475   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1476   case ISD::MUL:                return visitMUL(N);
1477   case ISD::SDIV:               return visitSDIV(N);
1478   case ISD::UDIV:               return visitUDIV(N);
1479   case ISD::SREM:
1480   case ISD::UREM:               return visitREM(N);
1481   case ISD::MULHU:              return visitMULHU(N);
1482   case ISD::MULHS:              return visitMULHS(N);
1483   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1484   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1485   case ISD::SMULO:              return visitSMULO(N);
1486   case ISD::UMULO:              return visitUMULO(N);
1487   case ISD::SMIN:
1488   case ISD::SMAX:
1489   case ISD::UMIN:
1490   case ISD::UMAX:               return visitIMINMAX(N);
1491   case ISD::AND:                return visitAND(N);
1492   case ISD::OR:                 return visitOR(N);
1493   case ISD::XOR:                return visitXOR(N);
1494   case ISD::SHL:                return visitSHL(N);
1495   case ISD::SRA:                return visitSRA(N);
1496   case ISD::SRL:                return visitSRL(N);
1497   case ISD::ROTR:
1498   case ISD::ROTL:               return visitRotate(N);
1499   case ISD::ABS:                return visitABS(N);
1500   case ISD::BSWAP:              return visitBSWAP(N);
1501   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1502   case ISD::CTLZ:               return visitCTLZ(N);
1503   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1504   case ISD::CTTZ:               return visitCTTZ(N);
1505   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1506   case ISD::CTPOP:              return visitCTPOP(N);
1507   case ISD::SELECT:             return visitSELECT(N);
1508   case ISD::VSELECT:            return visitVSELECT(N);
1509   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1510   case ISD::SETCC:              return visitSETCC(N);
1511   case ISD::SETCCE:             return visitSETCCE(N);
1512   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1513   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1514   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1515   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1516   case ISD::AssertZext:         return visitAssertZext(N);
1517   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1518   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1519   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1520   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1521   case ISD::BITCAST:            return visitBITCAST(N);
1522   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1523   case ISD::FADD:               return visitFADD(N);
1524   case ISD::FSUB:               return visitFSUB(N);
1525   case ISD::FMUL:               return visitFMUL(N);
1526   case ISD::FMA:                return visitFMA(N);
1527   case ISD::FDIV:               return visitFDIV(N);
1528   case ISD::FREM:               return visitFREM(N);
1529   case ISD::FSQRT:              return visitFSQRT(N);
1530   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1531   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1532   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1533   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1534   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1535   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1536   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1537   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1538   case ISD::FNEG:               return visitFNEG(N);
1539   case ISD::FABS:               return visitFABS(N);
1540   case ISD::FFLOOR:             return visitFFLOOR(N);
1541   case ISD::FMINNUM:            return visitFMINNUM(N);
1542   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1543   case ISD::FCEIL:              return visitFCEIL(N);
1544   case ISD::FTRUNC:             return visitFTRUNC(N);
1545   case ISD::BRCOND:             return visitBRCOND(N);
1546   case ISD::BR_CC:              return visitBR_CC(N);
1547   case ISD::LOAD:               return visitLOAD(N);
1548   case ISD::STORE:              return visitSTORE(N);
1549   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1550   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1551   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1552   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1553   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1554   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1555   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1556   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1557   case ISD::MGATHER:            return visitMGATHER(N);
1558   case ISD::MLOAD:              return visitMLOAD(N);
1559   case ISD::MSCATTER:           return visitMSCATTER(N);
1560   case ISD::MSTORE:             return visitMSTORE(N);
1561   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1562   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1563   }
1564   return SDValue();
1565 }
1566 
1567 SDValue DAGCombiner::combine(SDNode *N) {
1568   SDValue RV = visit(N);
1569 
1570   // If nothing happened, try a target-specific DAG combine.
1571   if (!RV.getNode()) {
1572     assert(N->getOpcode() != ISD::DELETED_NODE &&
1573            "Node was deleted but visit returned NULL!");
1574 
1575     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1576         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1577 
1578       // Expose the DAG combiner to the target combiner impls.
1579       TargetLowering::DAGCombinerInfo
1580         DagCombineInfo(DAG, Level, false, this);
1581 
1582       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1583     }
1584   }
1585 
1586   // If nothing happened still, try promoting the operation.
1587   if (!RV.getNode()) {
1588     switch (N->getOpcode()) {
1589     default: break;
1590     case ISD::ADD:
1591     case ISD::SUB:
1592     case ISD::MUL:
1593     case ISD::AND:
1594     case ISD::OR:
1595     case ISD::XOR:
1596       RV = PromoteIntBinOp(SDValue(N, 0));
1597       break;
1598     case ISD::SHL:
1599     case ISD::SRA:
1600     case ISD::SRL:
1601       RV = PromoteIntShiftOp(SDValue(N, 0));
1602       break;
1603     case ISD::SIGN_EXTEND:
1604     case ISD::ZERO_EXTEND:
1605     case ISD::ANY_EXTEND:
1606       RV = PromoteExtend(SDValue(N, 0));
1607       break;
1608     case ISD::LOAD:
1609       if (PromoteLoad(SDValue(N, 0)))
1610         RV = SDValue(N, 0);
1611       break;
1612     }
1613   }
1614 
1615   // If N is a commutative binary node, try commuting it to enable more
1616   // sdisel CSE.
1617   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1618       N->getNumValues() == 1) {
1619     SDValue N0 = N->getOperand(0);
1620     SDValue N1 = N->getOperand(1);
1621 
1622     // Constant operands are canonicalized to RHS.
1623     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1624       SDValue Ops[] = {N1, N0};
1625       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1626                                             N->getFlags());
1627       if (CSENode)
1628         return SDValue(CSENode, 0);
1629     }
1630   }
1631 
1632   return RV;
1633 }
1634 
1635 /// Given a node, return its input chain if it has one, otherwise return a null
1636 /// sd operand.
1637 static SDValue getInputChainForNode(SDNode *N) {
1638   if (unsigned NumOps = N->getNumOperands()) {
1639     if (N->getOperand(0).getValueType() == MVT::Other)
1640       return N->getOperand(0);
1641     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1642       return N->getOperand(NumOps-1);
1643     for (unsigned i = 1; i < NumOps-1; ++i)
1644       if (N->getOperand(i).getValueType() == MVT::Other)
1645         return N->getOperand(i);
1646   }
1647   return SDValue();
1648 }
1649 
1650 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1651   // If N has two operands, where one has an input chain equal to the other,
1652   // the 'other' chain is redundant.
1653   if (N->getNumOperands() == 2) {
1654     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1655       return N->getOperand(0);
1656     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1657       return N->getOperand(1);
1658   }
1659 
1660   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1661   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1662   SmallPtrSet<SDNode*, 16> SeenOps;
1663   bool Changed = false;             // If we should replace this token factor.
1664 
1665   // Start out with this token factor.
1666   TFs.push_back(N);
1667 
1668   // Iterate through token factors.  The TFs grows when new token factors are
1669   // encountered.
1670   for (unsigned i = 0; i < TFs.size(); ++i) {
1671     SDNode *TF = TFs[i];
1672 
1673     // Check each of the operands.
1674     for (const SDValue &Op : TF->op_values()) {
1675 
1676       switch (Op.getOpcode()) {
1677       case ISD::EntryToken:
1678         // Entry tokens don't need to be added to the list. They are
1679         // redundant.
1680         Changed = true;
1681         break;
1682 
1683       case ISD::TokenFactor:
1684         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1685           // Queue up for processing.
1686           TFs.push_back(Op.getNode());
1687           // Clean up in case the token factor is removed.
1688           AddToWorklist(Op.getNode());
1689           Changed = true;
1690           break;
1691         }
1692         LLVM_FALLTHROUGH;
1693 
1694       default:
1695         // Only add if it isn't already in the list.
1696         if (SeenOps.insert(Op.getNode()).second)
1697           Ops.push_back(Op);
1698         else
1699           Changed = true;
1700         break;
1701       }
1702     }
1703   }
1704 
1705   // Remove Nodes that are chained to another node in the list. Do so
1706   // by walking up chains breath-first stopping when we've seen
1707   // another operand. In general we must climb to the EntryNode, but we can exit
1708   // early if we find all remaining work is associated with just one operand as
1709   // no further pruning is possible.
1710 
1711   // List of nodes to search through and original Ops from which they originate.
1712   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1713   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1714   SmallPtrSet<SDNode *, 16> SeenChains;
1715   bool DidPruneOps = false;
1716 
1717   unsigned NumLeftToConsider = 0;
1718   for (const SDValue &Op : Ops) {
1719     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1720     OpWorkCount.push_back(1);
1721   }
1722 
1723   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1724     // If this is an Op, we can remove the op from the list. Remark any
1725     // search associated with it as from the current OpNumber.
1726     if (SeenOps.count(Op) != 0) {
1727       Changed = true;
1728       DidPruneOps = true;
1729       unsigned OrigOpNumber = 0;
1730       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1731         OrigOpNumber++;
1732       assert((OrigOpNumber != Ops.size()) &&
1733              "expected to find TokenFactor Operand");
1734       // Re-mark worklist from OrigOpNumber to OpNumber
1735       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1736         if (Worklist[i].second == OrigOpNumber) {
1737           Worklist[i].second = OpNumber;
1738         }
1739       }
1740       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1741       OpWorkCount[OrigOpNumber] = 0;
1742       NumLeftToConsider--;
1743     }
1744     // Add if it's a new chain
1745     if (SeenChains.insert(Op).second) {
1746       OpWorkCount[OpNumber]++;
1747       Worklist.push_back(std::make_pair(Op, OpNumber));
1748     }
1749   };
1750 
1751   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1752     // We need at least be consider at least 2 Ops to prune.
1753     if (NumLeftToConsider <= 1)
1754       break;
1755     auto CurNode = Worklist[i].first;
1756     auto CurOpNumber = Worklist[i].second;
1757     assert((OpWorkCount[CurOpNumber] > 0) &&
1758            "Node should not appear in worklist");
1759     switch (CurNode->getOpcode()) {
1760     case ISD::EntryToken:
1761       // Hitting EntryToken is the only way for the search to terminate without
1762       // hitting
1763       // another operand's search. Prevent us from marking this operand
1764       // considered.
1765       NumLeftToConsider++;
1766       break;
1767     case ISD::TokenFactor:
1768       for (const SDValue &Op : CurNode->op_values())
1769         AddToWorklist(i, Op.getNode(), CurOpNumber);
1770       break;
1771     case ISD::CopyFromReg:
1772     case ISD::CopyToReg:
1773       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1774       break;
1775     default:
1776       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1777         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1778       break;
1779     }
1780     OpWorkCount[CurOpNumber]--;
1781     if (OpWorkCount[CurOpNumber] == 0)
1782       NumLeftToConsider--;
1783   }
1784 
1785   // If we've changed things around then replace token factor.
1786   if (Changed) {
1787     SDValue Result;
1788     if (Ops.empty()) {
1789       // The entry token is the only possible outcome.
1790       Result = DAG.getEntryNode();
1791     } else {
1792       if (DidPruneOps) {
1793         SmallVector<SDValue, 8> PrunedOps;
1794         //
1795         for (const SDValue &Op : Ops) {
1796           if (SeenChains.count(Op.getNode()) == 0)
1797             PrunedOps.push_back(Op);
1798         }
1799         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1800       } else {
1801         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1802       }
1803     }
1804     return Result;
1805   }
1806   return SDValue();
1807 }
1808 
1809 /// MERGE_VALUES can always be eliminated.
1810 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1811   WorklistRemover DeadNodes(*this);
1812   // Replacing results may cause a different MERGE_VALUES to suddenly
1813   // be CSE'd with N, and carry its uses with it. Iterate until no
1814   // uses remain, to ensure that the node can be safely deleted.
1815   // First add the users of this node to the work list so that they
1816   // can be tried again once they have new operands.
1817   AddUsersToWorklist(N);
1818   do {
1819     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1820       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1821   } while (!N->use_empty());
1822   deleteAndRecombine(N);
1823   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1824 }
1825 
1826 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1827 /// ConstantSDNode pointer else nullptr.
1828 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1829   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1830   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1831 }
1832 
1833 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1834   auto BinOpcode = BO->getOpcode();
1835   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1836           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1837           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1838           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1839           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1840           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1841           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1842           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1843           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1844          "Unexpected binary operator");
1845 
1846   // Bail out if any constants are opaque because we can't constant fold those.
1847   SDValue C1 = BO->getOperand(1);
1848   if (!isConstantOrConstantVector(C1, true) &&
1849       !isConstantFPBuildVectorOrConstantFP(C1))
1850     return SDValue();
1851 
1852   // Don't do this unless the old select is going away. We want to eliminate the
1853   // binary operator, not replace a binop with a select.
1854   // TODO: Handle ISD::SELECT_CC.
1855   SDValue Sel = BO->getOperand(0);
1856   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1857     return SDValue();
1858 
1859   SDValue CT = Sel.getOperand(1);
1860   if (!isConstantOrConstantVector(CT, true) &&
1861       !isConstantFPBuildVectorOrConstantFP(CT))
1862     return SDValue();
1863 
1864   SDValue CF = Sel.getOperand(2);
1865   if (!isConstantOrConstantVector(CF, true) &&
1866       !isConstantFPBuildVectorOrConstantFP(CF))
1867     return SDValue();
1868 
1869   // We have a select-of-constants followed by a binary operator with a
1870   // constant. Eliminate the binop by pulling the constant math into the select.
1871   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1872   EVT VT = Sel.getValueType();
1873   SDLoc DL(Sel);
1874   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1875   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1876           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1877          "Failed to constant fold a binop with constant operands");
1878 
1879   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1880   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1881           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1882          "Failed to constant fold a binop with constant operands");
1883 
1884   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1885 }
1886 
1887 SDValue DAGCombiner::visitADD(SDNode *N) {
1888   SDValue N0 = N->getOperand(0);
1889   SDValue N1 = N->getOperand(1);
1890   EVT VT = N0.getValueType();
1891   SDLoc DL(N);
1892 
1893   // fold vector ops
1894   if (VT.isVector()) {
1895     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1896       return FoldedVOp;
1897 
1898     // fold (add x, 0) -> x, vector edition
1899     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1900       return N0;
1901     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1902       return N1;
1903   }
1904 
1905   // fold (add x, undef) -> undef
1906   if (N0.isUndef())
1907     return N0;
1908 
1909   if (N1.isUndef())
1910     return N1;
1911 
1912   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1913     // canonicalize constant to RHS
1914     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1915       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1916     // fold (add c1, c2) -> c1+c2
1917     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1918                                       N1.getNode());
1919   }
1920 
1921   // fold (add x, 0) -> x
1922   if (isNullConstant(N1))
1923     return N0;
1924 
1925   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1926     // fold ((c1-A)+c2) -> (c1+c2)-A
1927     if (N0.getOpcode() == ISD::SUB &&
1928         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1929       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1930       return DAG.getNode(ISD::SUB, DL, VT,
1931                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1932                          N0.getOperand(1));
1933     }
1934 
1935     // add (sext i1 X), 1 -> zext (not i1 X)
1936     // We don't transform this pattern:
1937     //   add (zext i1 X), -1 -> sext (not i1 X)
1938     // because most (?) targets generate better code for the zext form.
1939     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1940         isOneConstantOrOneSplatConstant(N1)) {
1941       SDValue X = N0.getOperand(0);
1942       if ((!LegalOperations ||
1943            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1944             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1945           X.getScalarValueSizeInBits() == 1) {
1946         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1947         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1948       }
1949     }
1950   }
1951 
1952   if (SDValue NewSel = foldBinOpIntoSelect(N))
1953     return NewSel;
1954 
1955   // reassociate add
1956   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1957     return RADD;
1958 
1959   // fold ((0-A) + B) -> B-A
1960   if (N0.getOpcode() == ISD::SUB &&
1961       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1962     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1963 
1964   // fold (A + (0-B)) -> A-B
1965   if (N1.getOpcode() == ISD::SUB &&
1966       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1967     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1968 
1969   // fold (A+(B-A)) -> B
1970   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1971     return N1.getOperand(0);
1972 
1973   // fold ((B-A)+A) -> B
1974   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1975     return N0.getOperand(0);
1976 
1977   // fold (A+(B-(A+C))) to (B-C)
1978   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1979       N0 == N1.getOperand(1).getOperand(0))
1980     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1981                        N1.getOperand(1).getOperand(1));
1982 
1983   // fold (A+(B-(C+A))) to (B-C)
1984   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1985       N0 == N1.getOperand(1).getOperand(1))
1986     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1987                        N1.getOperand(1).getOperand(0));
1988 
1989   // fold (A+((B-A)+or-C)) to (B+or-C)
1990   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1991       N1.getOperand(0).getOpcode() == ISD::SUB &&
1992       N0 == N1.getOperand(0).getOperand(1))
1993     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1994                        N1.getOperand(1));
1995 
1996   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1997   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1998     SDValue N00 = N0.getOperand(0);
1999     SDValue N01 = N0.getOperand(1);
2000     SDValue N10 = N1.getOperand(0);
2001     SDValue N11 = N1.getOperand(1);
2002 
2003     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2004       return DAG.getNode(ISD::SUB, DL, VT,
2005                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2006                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2007   }
2008 
2009   if (SimplifyDemandedBits(SDValue(N, 0)))
2010     return SDValue(N, 0);
2011 
2012   // fold (a+b) -> (a|b) iff a and b share no bits.
2013   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2014       DAG.haveNoCommonBitsSet(N0, N1))
2015     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2016 
2017   if (SDValue Combined = visitADDLike(N0, N1, N))
2018     return Combined;
2019 
2020   if (SDValue Combined = visitADDLike(N1, N0, N))
2021     return Combined;
2022 
2023   return SDValue();
2024 }
2025 
2026 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2027   bool Masked = false;
2028 
2029   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2030   while (true) {
2031     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2032       V = V.getOperand(0);
2033       continue;
2034     }
2035 
2036     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2037       Masked = true;
2038       V = V.getOperand(0);
2039       continue;
2040     }
2041 
2042     break;
2043   }
2044 
2045   // If this is not a carry, return.
2046   if (V.getResNo() != 1)
2047     return SDValue();
2048 
2049   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2050       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2051     return SDValue();
2052 
2053   // If the result is masked, then no matter what kind of bool it is we can
2054   // return. If it isn't, then we need to make sure the bool type is either 0 or
2055   // 1 and not other values.
2056   if (Masked ||
2057       TLI.getBooleanContents(V.getValueType()) ==
2058           TargetLoweringBase::ZeroOrOneBooleanContent)
2059     return V;
2060 
2061   return SDValue();
2062 }
2063 
2064 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2065   EVT VT = N0.getValueType();
2066   SDLoc DL(LocReference);
2067 
2068   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2069   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2070       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2071     return DAG.getNode(ISD::SUB, DL, VT, N0,
2072                        DAG.getNode(ISD::SHL, DL, VT,
2073                                    N1.getOperand(0).getOperand(1),
2074                                    N1.getOperand(1)));
2075 
2076   if (N1.getOpcode() == ISD::AND) {
2077     SDValue AndOp0 = N1.getOperand(0);
2078     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2079     unsigned DestBits = VT.getScalarSizeInBits();
2080 
2081     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2082     // and similar xforms where the inner op is either ~0 or 0.
2083     if (NumSignBits == DestBits &&
2084         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2085       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2086   }
2087 
2088   // add (sext i1), X -> sub X, (zext i1)
2089   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2090       N0.getOperand(0).getValueType() == MVT::i1 &&
2091       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2092     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2093     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2094   }
2095 
2096   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2097   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2098     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2099     if (TN->getVT() == MVT::i1) {
2100       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2101                                  DAG.getConstant(1, DL, VT));
2102       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2103     }
2104   }
2105 
2106   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2107   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2108     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2109                        N0, N1.getOperand(0), N1.getOperand(2));
2110 
2111   // (add X, Carry) -> (addcarry X, 0, Carry)
2112   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2113     if (SDValue Carry = getAsCarry(TLI, N1))
2114       return DAG.getNode(ISD::ADDCARRY, DL,
2115                          DAG.getVTList(VT, Carry.getValueType()), N0,
2116                          DAG.getConstant(0, DL, VT), Carry);
2117 
2118   return SDValue();
2119 }
2120 
2121 SDValue DAGCombiner::visitADDC(SDNode *N) {
2122   SDValue N0 = N->getOperand(0);
2123   SDValue N1 = N->getOperand(1);
2124   EVT VT = N0.getValueType();
2125   SDLoc DL(N);
2126 
2127   // If the flag result is dead, turn this into an ADD.
2128   if (!N->hasAnyUseOfValue(1))
2129     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2130                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2131 
2132   // canonicalize constant to RHS.
2133   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2134   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2135   if (N0C && !N1C)
2136     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2137 
2138   // fold (addc x, 0) -> x + no carry out
2139   if (isNullConstant(N1))
2140     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2141                                         DL, MVT::Glue));
2142 
2143   // If it cannot overflow, transform into an add.
2144   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2145     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2146                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2147 
2148   return SDValue();
2149 }
2150 
2151 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2152   SDValue N0 = N->getOperand(0);
2153   SDValue N1 = N->getOperand(1);
2154   EVT VT = N0.getValueType();
2155   if (VT.isVector())
2156     return SDValue();
2157 
2158   EVT CarryVT = N->getValueType(1);
2159   SDLoc DL(N);
2160 
2161   // If the flag result is dead, turn this into an ADD.
2162   if (!N->hasAnyUseOfValue(1))
2163     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2164                      DAG.getUNDEF(CarryVT));
2165 
2166   // canonicalize constant to RHS.
2167   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2168   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2169   if (N0C && !N1C)
2170     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2171 
2172   // fold (uaddo x, 0) -> x + no carry out
2173   if (isNullConstant(N1))
2174     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2175 
2176   // If it cannot overflow, transform into an add.
2177   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2178     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2179                      DAG.getConstant(0, DL, CarryVT));
2180 
2181   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2182     return Combined;
2183 
2184   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2185     return Combined;
2186 
2187   return SDValue();
2188 }
2189 
2190 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2191   auto VT = N0.getValueType();
2192 
2193   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2194   // If Y + 1 cannot overflow.
2195   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2196     SDValue Y = N1.getOperand(0);
2197     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2198     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2199       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2200                          N1.getOperand(2));
2201   }
2202 
2203   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2204   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2205     if (SDValue Carry = getAsCarry(TLI, N1))
2206       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2207                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2208 
2209   return SDValue();
2210 }
2211 
2212 SDValue DAGCombiner::visitADDE(SDNode *N) {
2213   SDValue N0 = N->getOperand(0);
2214   SDValue N1 = N->getOperand(1);
2215   SDValue CarryIn = N->getOperand(2);
2216 
2217   // canonicalize constant to RHS
2218   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2219   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2220   if (N0C && !N1C)
2221     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2222                        N1, N0, CarryIn);
2223 
2224   // fold (adde x, y, false) -> (addc x, y)
2225   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2226     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2227 
2228   return SDValue();
2229 }
2230 
2231 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2232   SDValue N0 = N->getOperand(0);
2233   SDValue N1 = N->getOperand(1);
2234   SDValue CarryIn = N->getOperand(2);
2235   SDLoc DL(N);
2236 
2237   // canonicalize constant to RHS
2238   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2239   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2240   if (N0C && !N1C)
2241     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2242 
2243   // fold (addcarry x, y, false) -> (uaddo x, y)
2244   if (isNullConstant(CarryIn))
2245     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2246 
2247   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2248   if (isNullConstant(N0) && isNullConstant(N1)) {
2249     EVT VT = N0.getValueType();
2250     EVT CarryVT = CarryIn.getValueType();
2251     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2252     AddToWorklist(CarryExt.getNode());
2253     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2254                                     DAG.getConstant(1, DL, VT)),
2255                      DAG.getConstant(0, DL, CarryVT));
2256   }
2257 
2258   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2259     return Combined;
2260 
2261   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2262     return Combined;
2263 
2264   return SDValue();
2265 }
2266 
2267 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2268                                        SDNode *N) {
2269   // Iff the flag result is dead:
2270   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2271   if ((N0.getOpcode() == ISD::ADD ||
2272        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2273       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2274     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2275                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2276 
2277   /**
2278    * When one of the addcarry argument is itself a carry, we may be facing
2279    * a diamond carry propagation. In which case we try to transform the DAG
2280    * to ensure linear carry propagation if that is possible.
2281    *
2282    * We are trying to get:
2283    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2284    */
2285   if (auto Y = getAsCarry(TLI, N1)) {
2286     /**
2287      *            (uaddo A, B)
2288      *             /       \
2289      *          Carry      Sum
2290      *            |          \
2291      *            | (addcarry *, 0, Z)
2292      *            |       /
2293      *             \   Carry
2294      *              |   /
2295      * (addcarry X, *, *)
2296      */
2297     if (Y.getOpcode() == ISD::UADDO &&
2298         CarryIn.getResNo() == 1 &&
2299         CarryIn.getOpcode() == ISD::ADDCARRY &&
2300         isNullConstant(CarryIn.getOperand(1)) &&
2301         CarryIn.getOperand(0) == Y.getValue(0)) {
2302       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2303                               Y.getOperand(0), Y.getOperand(1),
2304                               CarryIn.getOperand(2));
2305       AddToWorklist(NewY.getNode());
2306       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2307                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2308                          NewY.getValue(1));
2309     }
2310   }
2311 
2312   return SDValue();
2313 }
2314 
2315 // Since it may not be valid to emit a fold to zero for vector initializers
2316 // check if we can before folding.
2317 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2318                              SelectionDAG &DAG, bool LegalOperations,
2319                              bool LegalTypes) {
2320   if (!VT.isVector())
2321     return DAG.getConstant(0, DL, VT);
2322   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2323     return DAG.getConstant(0, DL, VT);
2324   return SDValue();
2325 }
2326 
2327 SDValue DAGCombiner::visitSUB(SDNode *N) {
2328   SDValue N0 = N->getOperand(0);
2329   SDValue N1 = N->getOperand(1);
2330   EVT VT = N0.getValueType();
2331   SDLoc DL(N);
2332 
2333   // fold vector ops
2334   if (VT.isVector()) {
2335     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2336       return FoldedVOp;
2337 
2338     // fold (sub x, 0) -> x, vector edition
2339     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2340       return N0;
2341   }
2342 
2343   // fold (sub x, x) -> 0
2344   // FIXME: Refactor this and xor and other similar operations together.
2345   if (N0 == N1)
2346     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2347   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2348       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2349     // fold (sub c1, c2) -> c1-c2
2350     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2351                                       N1.getNode());
2352   }
2353 
2354   if (SDValue NewSel = foldBinOpIntoSelect(N))
2355     return NewSel;
2356 
2357   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2358 
2359   // fold (sub x, c) -> (add x, -c)
2360   if (N1C) {
2361     return DAG.getNode(ISD::ADD, DL, VT, N0,
2362                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2363   }
2364 
2365   if (isNullConstantOrNullSplatConstant(N0)) {
2366     unsigned BitWidth = VT.getScalarSizeInBits();
2367     // Right-shifting everything out but the sign bit followed by negation is
2368     // the same as flipping arithmetic/logical shift type without the negation:
2369     // -(X >>u 31) -> (X >>s 31)
2370     // -(X >>s 31) -> (X >>u 31)
2371     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2372       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2373       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2374         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2375         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2376           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2377       }
2378     }
2379 
2380     // 0 - X --> 0 if the sub is NUW.
2381     if (N->getFlags().hasNoUnsignedWrap())
2382       return N0;
2383 
2384     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2385       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2386       // N1 must be 0 because negating the minimum signed value is undefined.
2387       if (N->getFlags().hasNoSignedWrap())
2388         return N0;
2389 
2390       // 0 - X --> X if X is 0 or the minimum signed value.
2391       return N1;
2392     }
2393   }
2394 
2395   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2396   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2397     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2398 
2399   // fold A-(A-B) -> B
2400   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2401     return N1.getOperand(1);
2402 
2403   // fold (A+B)-A -> B
2404   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2405     return N0.getOperand(1);
2406 
2407   // fold (A+B)-B -> A
2408   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2409     return N0.getOperand(0);
2410 
2411   // fold C2-(A+C1) -> (C2-C1)-A
2412   if (N1.getOpcode() == ISD::ADD) {
2413     SDValue N11 = N1.getOperand(1);
2414     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2415         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2416       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2417       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2418     }
2419   }
2420 
2421   // fold ((A+(B+or-C))-B) -> A+or-C
2422   if (N0.getOpcode() == ISD::ADD &&
2423       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2424        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2425       N0.getOperand(1).getOperand(0) == N1)
2426     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2427                        N0.getOperand(1).getOperand(1));
2428 
2429   // fold ((A+(C+B))-B) -> A+C
2430   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2431       N0.getOperand(1).getOperand(1) == N1)
2432     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2433                        N0.getOperand(1).getOperand(0));
2434 
2435   // fold ((A-(B-C))-C) -> A-B
2436   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2437       N0.getOperand(1).getOperand(1) == N1)
2438     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2439                        N0.getOperand(1).getOperand(0));
2440 
2441   // If either operand of a sub is undef, the result is undef
2442   if (N0.isUndef())
2443     return N0;
2444   if (N1.isUndef())
2445     return N1;
2446 
2447   // If the relocation model supports it, consider symbol offsets.
2448   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2449     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2450       // fold (sub Sym, c) -> Sym-c
2451       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2452         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2453                                     GA->getOffset() -
2454                                         (uint64_t)N1C->getSExtValue());
2455       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2456       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2457         if (GA->getGlobal() == GB->getGlobal())
2458           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2459                                  DL, VT);
2460     }
2461 
2462   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2463   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2464     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2465     if (TN->getVT() == MVT::i1) {
2466       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2467                                  DAG.getConstant(1, DL, VT));
2468       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2469     }
2470   }
2471 
2472   return SDValue();
2473 }
2474 
2475 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2476   SDValue N0 = N->getOperand(0);
2477   SDValue N1 = N->getOperand(1);
2478   EVT VT = N0.getValueType();
2479   SDLoc DL(N);
2480 
2481   // If the flag result is dead, turn this into an SUB.
2482   if (!N->hasAnyUseOfValue(1))
2483     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2484                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2485 
2486   // fold (subc x, x) -> 0 + no borrow
2487   if (N0 == N1)
2488     return CombineTo(N, DAG.getConstant(0, DL, VT),
2489                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2490 
2491   // fold (subc x, 0) -> x + no borrow
2492   if (isNullConstant(N1))
2493     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2494 
2495   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2496   if (isAllOnesConstant(N0))
2497     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2498                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2499 
2500   return SDValue();
2501 }
2502 
2503 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2504   SDValue N0 = N->getOperand(0);
2505   SDValue N1 = N->getOperand(1);
2506   EVT VT = N0.getValueType();
2507   if (VT.isVector())
2508     return SDValue();
2509 
2510   EVT CarryVT = N->getValueType(1);
2511   SDLoc DL(N);
2512 
2513   // If the flag result is dead, turn this into an SUB.
2514   if (!N->hasAnyUseOfValue(1))
2515     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2516                      DAG.getUNDEF(CarryVT));
2517 
2518   // fold (usubo x, x) -> 0 + no borrow
2519   if (N0 == N1)
2520     return CombineTo(N, DAG.getConstant(0, DL, VT),
2521                      DAG.getConstant(0, DL, CarryVT));
2522 
2523   // fold (usubo x, 0) -> x + no borrow
2524   if (isNullConstant(N1))
2525     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2526 
2527   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2528   if (isAllOnesConstant(N0))
2529     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2530                      DAG.getConstant(0, DL, CarryVT));
2531 
2532   return SDValue();
2533 }
2534 
2535 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2536   SDValue N0 = N->getOperand(0);
2537   SDValue N1 = N->getOperand(1);
2538   SDValue CarryIn = N->getOperand(2);
2539 
2540   // fold (sube x, y, false) -> (subc x, y)
2541   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2542     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2543 
2544   return SDValue();
2545 }
2546 
2547 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2548   SDValue N0 = N->getOperand(0);
2549   SDValue N1 = N->getOperand(1);
2550   SDValue CarryIn = N->getOperand(2);
2551 
2552   // fold (subcarry x, y, false) -> (usubo x, y)
2553   if (isNullConstant(CarryIn))
2554     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2555 
2556   return SDValue();
2557 }
2558 
2559 SDValue DAGCombiner::visitMUL(SDNode *N) {
2560   SDValue N0 = N->getOperand(0);
2561   SDValue N1 = N->getOperand(1);
2562   EVT VT = N0.getValueType();
2563 
2564   // fold (mul x, undef) -> 0
2565   if (N0.isUndef() || N1.isUndef())
2566     return DAG.getConstant(0, SDLoc(N), VT);
2567 
2568   bool N0IsConst = false;
2569   bool N1IsConst = false;
2570   bool N1IsOpaqueConst = false;
2571   bool N0IsOpaqueConst = false;
2572   APInt ConstValue0, ConstValue1;
2573   // fold vector ops
2574   if (VT.isVector()) {
2575     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2576       return FoldedVOp;
2577 
2578     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2579     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2580   } else {
2581     N0IsConst = isa<ConstantSDNode>(N0);
2582     if (N0IsConst) {
2583       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2584       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2585     }
2586     N1IsConst = isa<ConstantSDNode>(N1);
2587     if (N1IsConst) {
2588       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2589       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2590     }
2591   }
2592 
2593   // fold (mul c1, c2) -> c1*c2
2594   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2595     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2596                                       N0.getNode(), N1.getNode());
2597 
2598   // canonicalize constant to RHS (vector doesn't have to splat)
2599   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2600      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2601     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2602   // fold (mul x, 0) -> 0
2603   if (N1IsConst && ConstValue1.isNullValue())
2604     return N1;
2605   // We require a splat of the entire scalar bit width for non-contiguous
2606   // bit patterns.
2607   bool IsFullSplat =
2608     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2609   // fold (mul x, 1) -> x
2610   if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
2611     return N0;
2612 
2613   if (SDValue NewSel = foldBinOpIntoSelect(N))
2614     return NewSel;
2615 
2616   // fold (mul x, -1) -> 0-x
2617   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2618     SDLoc DL(N);
2619     return DAG.getNode(ISD::SUB, DL, VT,
2620                        DAG.getConstant(0, DL, VT), N0);
2621   }
2622   // fold (mul x, (1 << c)) -> x << c
2623   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2624       IsFullSplat) {
2625     SDLoc DL(N);
2626     return DAG.getNode(ISD::SHL, DL, VT, N0,
2627                        DAG.getConstant(ConstValue1.logBase2(), DL,
2628                                        getShiftAmountTy(N0.getValueType())));
2629   }
2630   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2631   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2632       IsFullSplat) {
2633     unsigned Log2Val = (-ConstValue1).logBase2();
2634     SDLoc DL(N);
2635     // FIXME: If the input is something that is easily negated (e.g. a
2636     // single-use add), we should put the negate there.
2637     return DAG.getNode(ISD::SUB, DL, VT,
2638                        DAG.getConstant(0, DL, VT),
2639                        DAG.getNode(ISD::SHL, DL, VT, N0,
2640                             DAG.getConstant(Log2Val, DL,
2641                                       getShiftAmountTy(N0.getValueType()))));
2642   }
2643 
2644   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2645   if (N0.getOpcode() == ISD::SHL &&
2646       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2647       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2648     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2649     if (isConstantOrConstantVector(C3))
2650       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2651   }
2652 
2653   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2654   // use.
2655   {
2656     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2657 
2658     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2659     if (N0.getOpcode() == ISD::SHL &&
2660         isConstantOrConstantVector(N0.getOperand(1)) &&
2661         N0.getNode()->hasOneUse()) {
2662       Sh = N0; Y = N1;
2663     } else if (N1.getOpcode() == ISD::SHL &&
2664                isConstantOrConstantVector(N1.getOperand(1)) &&
2665                N1.getNode()->hasOneUse()) {
2666       Sh = N1; Y = N0;
2667     }
2668 
2669     if (Sh.getNode()) {
2670       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2671       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2672     }
2673   }
2674 
2675   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2676   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2677       N0.getOpcode() == ISD::ADD &&
2678       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2679       isMulAddWithConstProfitable(N, N0, N1))
2680       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2681                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2682                                      N0.getOperand(0), N1),
2683                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2684                                      N0.getOperand(1), N1));
2685 
2686   // reassociate mul
2687   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2688     return RMUL;
2689 
2690   return SDValue();
2691 }
2692 
2693 /// Return true if divmod libcall is available.
2694 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2695                                      const TargetLowering &TLI) {
2696   RTLIB::Libcall LC;
2697   EVT NodeType = Node->getValueType(0);
2698   if (!NodeType.isSimple())
2699     return false;
2700   switch (NodeType.getSimpleVT().SimpleTy) {
2701   default: return false; // No libcall for vector types.
2702   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2703   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2704   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2705   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2706   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2707   }
2708 
2709   return TLI.getLibcallName(LC) != nullptr;
2710 }
2711 
2712 /// Issue divrem if both quotient and remainder are needed.
2713 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2714   if (Node->use_empty())
2715     return SDValue(); // This is a dead node, leave it alone.
2716 
2717   unsigned Opcode = Node->getOpcode();
2718   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2719   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2720 
2721   // DivMod lib calls can still work on non-legal types if using lib-calls.
2722   EVT VT = Node->getValueType(0);
2723   if (VT.isVector() || !VT.isInteger())
2724     return SDValue();
2725 
2726   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2727     return SDValue();
2728 
2729   // If DIVREM is going to get expanded into a libcall,
2730   // but there is no libcall available, then don't combine.
2731   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2732       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2733     return SDValue();
2734 
2735   // If div is legal, it's better to do the normal expansion
2736   unsigned OtherOpcode = 0;
2737   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2738     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2739     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2740       return SDValue();
2741   } else {
2742     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2743     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2744       return SDValue();
2745   }
2746 
2747   SDValue Op0 = Node->getOperand(0);
2748   SDValue Op1 = Node->getOperand(1);
2749   SDValue combined;
2750   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2751          UE = Op0.getNode()->use_end(); UI != UE;) {
2752     SDNode *User = *UI++;
2753     if (User == Node || User->use_empty())
2754       continue;
2755     // Convert the other matching node(s), too;
2756     // otherwise, the DIVREM may get target-legalized into something
2757     // target-specific that we won't be able to recognize.
2758     unsigned UserOpc = User->getOpcode();
2759     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2760         User->getOperand(0) == Op0 &&
2761         User->getOperand(1) == Op1) {
2762       if (!combined) {
2763         if (UserOpc == OtherOpcode) {
2764           SDVTList VTs = DAG.getVTList(VT, VT);
2765           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2766         } else if (UserOpc == DivRemOpc) {
2767           combined = SDValue(User, 0);
2768         } else {
2769           assert(UserOpc == Opcode);
2770           continue;
2771         }
2772       }
2773       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2774         CombineTo(User, combined);
2775       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2776         CombineTo(User, combined.getValue(1));
2777     }
2778   }
2779   return combined;
2780 }
2781 
2782 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2783   SDValue N0 = N->getOperand(0);
2784   SDValue N1 = N->getOperand(1);
2785   EVT VT = N->getValueType(0);
2786   SDLoc DL(N);
2787 
2788   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2789     return DAG.getUNDEF(VT);
2790 
2791   // undef / X -> 0
2792   // undef % X -> 0
2793   if (N0.isUndef())
2794     return DAG.getConstant(0, DL, VT);
2795 
2796   return SDValue();
2797 }
2798 
2799 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2800   SDValue N0 = N->getOperand(0);
2801   SDValue N1 = N->getOperand(1);
2802   EVT VT = N->getValueType(0);
2803 
2804   // fold vector ops
2805   if (VT.isVector())
2806     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2807       return FoldedVOp;
2808 
2809   SDLoc DL(N);
2810 
2811   // fold (sdiv c1, c2) -> c1/c2
2812   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2813   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2814   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2815     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2816   // fold (sdiv X, 1) -> X
2817   if (N1C && N1C->isOne())
2818     return N0;
2819   // fold (sdiv X, -1) -> 0-X
2820   if (N1C && N1C->isAllOnesValue())
2821     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2822 
2823   if (SDValue V = simplifyDivRem(N, DAG))
2824     return V;
2825 
2826   if (SDValue NewSel = foldBinOpIntoSelect(N))
2827     return NewSel;
2828 
2829   // If we know the sign bits of both operands are zero, strength reduce to a
2830   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2831   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2832     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2833 
2834   // fold (sdiv X, pow2) -> simple ops after legalize
2835   // FIXME: We check for the exact bit here because the generic lowering gives
2836   // better results in that case. The target-specific lowering should learn how
2837   // to handle exact sdivs efficiently.
2838   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2839       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2840                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2841     // Target-specific implementation of sdiv x, pow2.
2842     if (SDValue Res = BuildSDIVPow2(N))
2843       return Res;
2844 
2845     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2846 
2847     // Splat the sign bit into the register
2848     SDValue SGN =
2849         DAG.getNode(ISD::SRA, DL, VT, N0,
2850                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2851                                     getShiftAmountTy(N0.getValueType())));
2852     AddToWorklist(SGN.getNode());
2853 
2854     // Add (N0 < 0) ? abs2 - 1 : 0;
2855     SDValue SRL =
2856         DAG.getNode(ISD::SRL, DL, VT, SGN,
2857                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2858                                     getShiftAmountTy(SGN.getValueType())));
2859     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2860     AddToWorklist(SRL.getNode());
2861     AddToWorklist(ADD.getNode());    // Divide by pow2
2862     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2863                   DAG.getConstant(lg2, DL,
2864                                   getShiftAmountTy(ADD.getValueType())));
2865 
2866     // If we're dividing by a positive value, we're done.  Otherwise, we must
2867     // negate the result.
2868     if (N1C->getAPIntValue().isNonNegative())
2869       return SRA;
2870 
2871     AddToWorklist(SRA.getNode());
2872     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2873   }
2874 
2875   // If integer divide is expensive and we satisfy the requirements, emit an
2876   // alternate sequence.  Targets may check function attributes for size/speed
2877   // trade-offs.
2878   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2879   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2880     if (SDValue Op = BuildSDIV(N))
2881       return Op;
2882 
2883   // sdiv, srem -> sdivrem
2884   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2885   // true.  Otherwise, we break the simplification logic in visitREM().
2886   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2887     if (SDValue DivRem = useDivRem(N))
2888         return DivRem;
2889 
2890   return SDValue();
2891 }
2892 
2893 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2894   SDValue N0 = N->getOperand(0);
2895   SDValue N1 = N->getOperand(1);
2896   EVT VT = N->getValueType(0);
2897 
2898   // fold vector ops
2899   if (VT.isVector())
2900     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2901       return FoldedVOp;
2902 
2903   SDLoc DL(N);
2904 
2905   // fold (udiv c1, c2) -> c1/c2
2906   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2907   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2908   if (N0C && N1C)
2909     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2910                                                     N0C, N1C))
2911       return Folded;
2912 
2913   if (SDValue V = simplifyDivRem(N, DAG))
2914     return V;
2915 
2916   if (SDValue NewSel = foldBinOpIntoSelect(N))
2917     return NewSel;
2918 
2919   // fold (udiv x, (1 << c)) -> x >>u c
2920   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2921       DAG.isKnownToBeAPowerOfTwo(N1)) {
2922     SDValue LogBase2 = BuildLogBase2(N1, DL);
2923     AddToWorklist(LogBase2.getNode());
2924 
2925     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2926     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2927     AddToWorklist(Trunc.getNode());
2928     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2929   }
2930 
2931   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2932   if (N1.getOpcode() == ISD::SHL) {
2933     SDValue N10 = N1.getOperand(0);
2934     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2935         DAG.isKnownToBeAPowerOfTwo(N10)) {
2936       SDValue LogBase2 = BuildLogBase2(N10, DL);
2937       AddToWorklist(LogBase2.getNode());
2938 
2939       EVT ADDVT = N1.getOperand(1).getValueType();
2940       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2941       AddToWorklist(Trunc.getNode());
2942       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2943       AddToWorklist(Add.getNode());
2944       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2945     }
2946   }
2947 
2948   // fold (udiv x, c) -> alternate
2949   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2950   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2951     if (SDValue Op = BuildUDIV(N))
2952       return Op;
2953 
2954   // sdiv, srem -> sdivrem
2955   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2956   // true.  Otherwise, we break the simplification logic in visitREM().
2957   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2958     if (SDValue DivRem = useDivRem(N))
2959         return DivRem;
2960 
2961   return SDValue();
2962 }
2963 
2964 // handles ISD::SREM and ISD::UREM
2965 SDValue DAGCombiner::visitREM(SDNode *N) {
2966   unsigned Opcode = N->getOpcode();
2967   SDValue N0 = N->getOperand(0);
2968   SDValue N1 = N->getOperand(1);
2969   EVT VT = N->getValueType(0);
2970   bool isSigned = (Opcode == ISD::SREM);
2971   SDLoc DL(N);
2972 
2973   // fold (rem c1, c2) -> c1%c2
2974   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2975   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2976   if (N0C && N1C)
2977     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2978       return Folded;
2979 
2980   if (SDValue V = simplifyDivRem(N, DAG))
2981     return V;
2982 
2983   if (SDValue NewSel = foldBinOpIntoSelect(N))
2984     return NewSel;
2985 
2986   if (isSigned) {
2987     // If we know the sign bits of both operands are zero, strength reduce to a
2988     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2989     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2990       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2991   } else {
2992     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2993     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2994       // fold (urem x, pow2) -> (and x, pow2-1)
2995       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2996       AddToWorklist(Add.getNode());
2997       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2998     }
2999     if (N1.getOpcode() == ISD::SHL &&
3000         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3001       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3002       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3003       AddToWorklist(Add.getNode());
3004       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3005     }
3006   }
3007 
3008   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
3009 
3010   // If X/C can be simplified by the division-by-constant logic, lower
3011   // X%C to the equivalent of X-X/C*C.
3012   // To avoid mangling nodes, this simplification requires that the combine()
3013   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3014   // against this by skipping the simplification if isIntDivCheap().  When
3015   // div is not cheap, combine will not return a DIVREM.  Regardless,
3016   // checking cheapness here makes sense since the simplification results in
3017   // fatter code.
3018   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3019     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3020     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3021     AddToWorklist(Div.getNode());
3022     SDValue OptimizedDiv = combine(Div.getNode());
3023     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3024       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3025              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3026       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3027       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3028       AddToWorklist(Mul.getNode());
3029       return Sub;
3030     }
3031   }
3032 
3033   // sdiv, srem -> sdivrem
3034   if (SDValue DivRem = useDivRem(N))
3035     return DivRem.getValue(1);
3036 
3037   return SDValue();
3038 }
3039 
3040 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3041   SDValue N0 = N->getOperand(0);
3042   SDValue N1 = N->getOperand(1);
3043   EVT VT = N->getValueType(0);
3044   SDLoc DL(N);
3045 
3046   // fold (mulhs x, 0) -> 0
3047   if (isNullConstant(N1))
3048     return N1;
3049   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3050   if (isOneConstant(N1)) {
3051     SDLoc DL(N);
3052     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3053                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3054                                        getShiftAmountTy(N0.getValueType())));
3055   }
3056   // fold (mulhs x, undef) -> 0
3057   if (N0.isUndef() || N1.isUndef())
3058     return DAG.getConstant(0, SDLoc(N), VT);
3059 
3060   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3061   // plus a shift.
3062   if (VT.isSimple() && !VT.isVector()) {
3063     MVT Simple = VT.getSimpleVT();
3064     unsigned SimpleSize = Simple.getSizeInBits();
3065     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3066     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3067       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3068       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3069       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3070       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3071             DAG.getConstant(SimpleSize, DL,
3072                             getShiftAmountTy(N1.getValueType())));
3073       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3074     }
3075   }
3076 
3077   return SDValue();
3078 }
3079 
3080 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3081   SDValue N0 = N->getOperand(0);
3082   SDValue N1 = N->getOperand(1);
3083   EVT VT = N->getValueType(0);
3084   SDLoc DL(N);
3085 
3086   // fold (mulhu x, 0) -> 0
3087   if (isNullConstant(N1))
3088     return N1;
3089   // fold (mulhu x, 1) -> 0
3090   if (isOneConstant(N1))
3091     return DAG.getConstant(0, DL, N0.getValueType());
3092   // fold (mulhu x, undef) -> 0
3093   if (N0.isUndef() || N1.isUndef())
3094     return DAG.getConstant(0, DL, VT);
3095 
3096   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3097   // plus a shift.
3098   if (VT.isSimple() && !VT.isVector()) {
3099     MVT Simple = VT.getSimpleVT();
3100     unsigned SimpleSize = Simple.getSizeInBits();
3101     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3102     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3103       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3104       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3105       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3106       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3107             DAG.getConstant(SimpleSize, DL,
3108                             getShiftAmountTy(N1.getValueType())));
3109       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3110     }
3111   }
3112 
3113   return SDValue();
3114 }
3115 
3116 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3117 /// give the opcodes for the two computations that are being performed. Return
3118 /// true if a simplification was made.
3119 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3120                                                 unsigned HiOp) {
3121   // If the high half is not needed, just compute the low half.
3122   bool HiExists = N->hasAnyUseOfValue(1);
3123   if (!HiExists &&
3124       (!LegalOperations ||
3125        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3126     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3127     return CombineTo(N, Res, Res);
3128   }
3129 
3130   // If the low half is not needed, just compute the high half.
3131   bool LoExists = N->hasAnyUseOfValue(0);
3132   if (!LoExists &&
3133       (!LegalOperations ||
3134        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3135     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3136     return CombineTo(N, Res, Res);
3137   }
3138 
3139   // If both halves are used, return as it is.
3140   if (LoExists && HiExists)
3141     return SDValue();
3142 
3143   // If the two computed results can be simplified separately, separate them.
3144   if (LoExists) {
3145     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3146     AddToWorklist(Lo.getNode());
3147     SDValue LoOpt = combine(Lo.getNode());
3148     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3149         (!LegalOperations ||
3150          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3151       return CombineTo(N, LoOpt, LoOpt);
3152   }
3153 
3154   if (HiExists) {
3155     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3156     AddToWorklist(Hi.getNode());
3157     SDValue HiOpt = combine(Hi.getNode());
3158     if (HiOpt.getNode() && HiOpt != Hi &&
3159         (!LegalOperations ||
3160          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3161       return CombineTo(N, HiOpt, HiOpt);
3162   }
3163 
3164   return SDValue();
3165 }
3166 
3167 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3168   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3169     return Res;
3170 
3171   EVT VT = N->getValueType(0);
3172   SDLoc DL(N);
3173 
3174   // If the type is twice as wide is legal, transform the mulhu to a wider
3175   // multiply plus a shift.
3176   if (VT.isSimple() && !VT.isVector()) {
3177     MVT Simple = VT.getSimpleVT();
3178     unsigned SimpleSize = Simple.getSizeInBits();
3179     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3180     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3181       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3182       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3183       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3184       // Compute the high part as N1.
3185       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3186             DAG.getConstant(SimpleSize, DL,
3187                             getShiftAmountTy(Lo.getValueType())));
3188       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3189       // Compute the low part as N0.
3190       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3191       return CombineTo(N, Lo, Hi);
3192     }
3193   }
3194 
3195   return SDValue();
3196 }
3197 
3198 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3199   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3200     return Res;
3201 
3202   EVT VT = N->getValueType(0);
3203   SDLoc DL(N);
3204 
3205   // If the type is twice as wide is legal, transform the mulhu to a wider
3206   // multiply plus a shift.
3207   if (VT.isSimple() && !VT.isVector()) {
3208     MVT Simple = VT.getSimpleVT();
3209     unsigned SimpleSize = Simple.getSizeInBits();
3210     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3211     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3212       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3213       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3214       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3215       // Compute the high part as N1.
3216       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3217             DAG.getConstant(SimpleSize, DL,
3218                             getShiftAmountTy(Lo.getValueType())));
3219       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3220       // Compute the low part as N0.
3221       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3222       return CombineTo(N, Lo, Hi);
3223     }
3224   }
3225 
3226   return SDValue();
3227 }
3228 
3229 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3230   // (smulo x, 2) -> (saddo x, x)
3231   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3232     if (C2->getAPIntValue() == 2)
3233       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3234                          N->getOperand(0), N->getOperand(0));
3235 
3236   return SDValue();
3237 }
3238 
3239 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3240   // (umulo x, 2) -> (uaddo x, x)
3241   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3242     if (C2->getAPIntValue() == 2)
3243       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3244                          N->getOperand(0), N->getOperand(0));
3245 
3246   return SDValue();
3247 }
3248 
3249 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3250   SDValue N0 = N->getOperand(0);
3251   SDValue N1 = N->getOperand(1);
3252   EVT VT = N0.getValueType();
3253 
3254   // fold vector ops
3255   if (VT.isVector())
3256     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3257       return FoldedVOp;
3258 
3259   // fold operation with constant operands.
3260   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3261   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3262   if (N0C && N1C)
3263     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3264 
3265   // canonicalize constant to RHS
3266   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3267      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3268     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3269 
3270   return SDValue();
3271 }
3272 
3273 /// If this is a binary operator with two operands of the same opcode, try to
3274 /// simplify it.
3275 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3276   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3277   EVT VT = N0.getValueType();
3278   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3279 
3280   // Bail early if none of these transforms apply.
3281   if (N0.getNumOperands() == 0) return SDValue();
3282 
3283   // For each of OP in AND/OR/XOR:
3284   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3285   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3286   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3287   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3288   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3289   //
3290   // do not sink logical op inside of a vector extend, since it may combine
3291   // into a vsetcc.
3292   EVT Op0VT = N0.getOperand(0).getValueType();
3293   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3294        N0.getOpcode() == ISD::SIGN_EXTEND ||
3295        N0.getOpcode() == ISD::BSWAP ||
3296        // Avoid infinite looping with PromoteIntBinOp.
3297        (N0.getOpcode() == ISD::ANY_EXTEND &&
3298         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3299        (N0.getOpcode() == ISD::TRUNCATE &&
3300         (!TLI.isZExtFree(VT, Op0VT) ||
3301          !TLI.isTruncateFree(Op0VT, VT)) &&
3302         TLI.isTypeLegal(Op0VT))) &&
3303       !VT.isVector() &&
3304       Op0VT == N1.getOperand(0).getValueType() &&
3305       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3306     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3307                                  N0.getOperand(0).getValueType(),
3308                                  N0.getOperand(0), N1.getOperand(0));
3309     AddToWorklist(ORNode.getNode());
3310     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3311   }
3312 
3313   // For each of OP in SHL/SRL/SRA/AND...
3314   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3315   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3316   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3317   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3318        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3319       N0.getOperand(1) == N1.getOperand(1)) {
3320     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3321                                  N0.getOperand(0).getValueType(),
3322                                  N0.getOperand(0), N1.getOperand(0));
3323     AddToWorklist(ORNode.getNode());
3324     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3325                        ORNode, N0.getOperand(1));
3326   }
3327 
3328   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3329   // Only perform this optimization up until type legalization, before
3330   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3331   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3332   // we don't want to undo this promotion.
3333   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3334   // on scalars.
3335   if ((N0.getOpcode() == ISD::BITCAST ||
3336        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3337        Level <= AfterLegalizeTypes) {
3338     SDValue In0 = N0.getOperand(0);
3339     SDValue In1 = N1.getOperand(0);
3340     EVT In0Ty = In0.getValueType();
3341     EVT In1Ty = In1.getValueType();
3342     SDLoc DL(N);
3343     // If both incoming values are integers, and the original types are the
3344     // same.
3345     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3346       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3347       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3348       AddToWorklist(Op.getNode());
3349       return BC;
3350     }
3351   }
3352 
3353   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3354   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3355   // If both shuffles use the same mask, and both shuffle within a single
3356   // vector, then it is worthwhile to move the swizzle after the operation.
3357   // The type-legalizer generates this pattern when loading illegal
3358   // vector types from memory. In many cases this allows additional shuffle
3359   // optimizations.
3360   // There are other cases where moving the shuffle after the xor/and/or
3361   // is profitable even if shuffles don't perform a swizzle.
3362   // If both shuffles use the same mask, and both shuffles have the same first
3363   // or second operand, then it might still be profitable to move the shuffle
3364   // after the xor/and/or operation.
3365   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3366     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3367     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3368 
3369     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3370            "Inputs to shuffles are not the same type");
3371 
3372     // Check that both shuffles use the same mask. The masks are known to be of
3373     // the same length because the result vector type is the same.
3374     // Check also that shuffles have only one use to avoid introducing extra
3375     // instructions.
3376     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3377         SVN0->getMask().equals(SVN1->getMask())) {
3378       SDValue ShOp = N0->getOperand(1);
3379 
3380       // Don't try to fold this node if it requires introducing a
3381       // build vector of all zeros that might be illegal at this stage.
3382       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3383         if (!LegalTypes)
3384           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3385         else
3386           ShOp = SDValue();
3387       }
3388 
3389       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3390       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3391       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3392       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3393         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3394                                       N0->getOperand(0), N1->getOperand(0));
3395         AddToWorklist(NewNode.getNode());
3396         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3397                                     SVN0->getMask());
3398       }
3399 
3400       // Don't try to fold this node if it requires introducing a
3401       // build vector of all zeros that might be illegal at this stage.
3402       ShOp = N0->getOperand(0);
3403       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3404         if (!LegalTypes)
3405           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3406         else
3407           ShOp = SDValue();
3408       }
3409 
3410       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3411       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3412       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3413       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3414         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3415                                       N0->getOperand(1), N1->getOperand(1));
3416         AddToWorklist(NewNode.getNode());
3417         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3418                                     SVN0->getMask());
3419       }
3420     }
3421   }
3422 
3423   return SDValue();
3424 }
3425 
3426 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3427 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3428                                        const SDLoc &DL) {
3429   SDValue LL, LR, RL, RR, N0CC, N1CC;
3430   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3431       !isSetCCEquivalent(N1, RL, RR, N1CC))
3432     return SDValue();
3433 
3434   assert(N0.getValueType() == N1.getValueType() &&
3435          "Unexpected operand types for bitwise logic op");
3436   assert(LL.getValueType() == LR.getValueType() &&
3437          RL.getValueType() == RR.getValueType() &&
3438          "Unexpected operand types for setcc");
3439 
3440   // If we're here post-legalization or the logic op type is not i1, the logic
3441   // op type must match a setcc result type. Also, all folds require new
3442   // operations on the left and right operands, so those types must match.
3443   EVT VT = N0.getValueType();
3444   EVT OpVT = LL.getValueType();
3445   if (LegalOperations || VT != MVT::i1)
3446     if (VT != getSetCCResultType(OpVT))
3447       return SDValue();
3448   if (OpVT != RL.getValueType())
3449     return SDValue();
3450 
3451   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3452   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3453   bool IsInteger = OpVT.isInteger();
3454   if (LR == RR && CC0 == CC1 && IsInteger) {
3455     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3456     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3457 
3458     // All bits clear?
3459     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3460     // All sign bits clear?
3461     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3462     // Any bits set?
3463     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3464     // Any sign bits set?
3465     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3466 
3467     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3468     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3469     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3470     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3471     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3472       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3473       AddToWorklist(Or.getNode());
3474       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3475     }
3476 
3477     // All bits set?
3478     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3479     // All sign bits set?
3480     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3481     // Any bits clear?
3482     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3483     // Any sign bits clear?
3484     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3485 
3486     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3487     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3488     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3489     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3490     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3491       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3492       AddToWorklist(And.getNode());
3493       return DAG.getSetCC(DL, VT, And, LR, CC1);
3494     }
3495   }
3496 
3497   // TODO: What is the 'or' equivalent of this fold?
3498   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3499   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3500       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3501        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3502     SDValue One = DAG.getConstant(1, DL, OpVT);
3503     SDValue Two = DAG.getConstant(2, DL, OpVT);
3504     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3505     AddToWorklist(Add.getNode());
3506     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3507   }
3508 
3509   // Try more general transforms if the predicates match and the only user of
3510   // the compares is the 'and' or 'or'.
3511   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3512       N0.hasOneUse() && N1.hasOneUse()) {
3513     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3514     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3515     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3516       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3517       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3518       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3519       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3520       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3521     }
3522   }
3523 
3524   // Canonicalize equivalent operands to LL == RL.
3525   if (LL == RR && LR == RL) {
3526     CC1 = ISD::getSetCCSwappedOperands(CC1);
3527     std::swap(RL, RR);
3528   }
3529 
3530   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3531   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3532   if (LL == RL && LR == RR) {
3533     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3534                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3535     if (NewCC != ISD::SETCC_INVALID &&
3536         (!LegalOperations ||
3537          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3538           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3539       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3540   }
3541 
3542   return SDValue();
3543 }
3544 
3545 /// This contains all DAGCombine rules which reduce two values combined by
3546 /// an And operation to a single value. This makes them reusable in the context
3547 /// of visitSELECT(). Rules involving constants are not included as
3548 /// visitSELECT() already handles those cases.
3549 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3550   EVT VT = N1.getValueType();
3551   SDLoc DL(N);
3552 
3553   // fold (and x, undef) -> 0
3554   if (N0.isUndef() || N1.isUndef())
3555     return DAG.getConstant(0, DL, VT);
3556 
3557   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3558     return V;
3559 
3560   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3561       VT.getSizeInBits() <= 64) {
3562     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3563       APInt ADDC = ADDI->getAPIntValue();
3564       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3565         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3566         // immediate for an add, but it is legal if its top c2 bits are set,
3567         // transform the ADD so the immediate doesn't need to be materialized
3568         // in a register.
3569         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3570           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3571                                              SRLI->getZExtValue());
3572           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3573             ADDC |= Mask;
3574             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3575               SDLoc DL0(N0);
3576               SDValue NewAdd =
3577                 DAG.getNode(ISD::ADD, DL0, VT,
3578                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3579               CombineTo(N0.getNode(), NewAdd);
3580               // Return N so it doesn't get rechecked!
3581               return SDValue(N, 0);
3582             }
3583           }
3584         }
3585       }
3586     }
3587   }
3588 
3589   // Reduce bit extract of low half of an integer to the narrower type.
3590   // (and (srl i64:x, K), KMask) ->
3591   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3592   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3593     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3594       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3595         unsigned Size = VT.getSizeInBits();
3596         const APInt &AndMask = CAnd->getAPIntValue();
3597         unsigned ShiftBits = CShift->getZExtValue();
3598 
3599         // Bail out, this node will probably disappear anyway.
3600         if (ShiftBits == 0)
3601           return SDValue();
3602 
3603         unsigned MaskBits = AndMask.countTrailingOnes();
3604         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3605 
3606         if (AndMask.isMask() &&
3607             // Required bits must not span the two halves of the integer and
3608             // must fit in the half size type.
3609             (ShiftBits + MaskBits <= Size / 2) &&
3610             TLI.isNarrowingProfitable(VT, HalfVT) &&
3611             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3612             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3613             TLI.isTruncateFree(VT, HalfVT) &&
3614             TLI.isZExtFree(HalfVT, VT)) {
3615           // The isNarrowingProfitable is to avoid regressions on PPC and
3616           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3617           // on downstream users of this. Those patterns could probably be
3618           // extended to handle extensions mixed in.
3619 
3620           SDValue SL(N0);
3621           assert(MaskBits <= Size);
3622 
3623           // Extracting the highest bit of the low half.
3624           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3625           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3626                                       N0.getOperand(0));
3627 
3628           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3629           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3630           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3631           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3632           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3633         }
3634       }
3635     }
3636   }
3637 
3638   return SDValue();
3639 }
3640 
3641 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3642                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3643                                    bool &NarrowLoad) {
3644   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3645 
3646   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3647     return false;
3648 
3649   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3650   LoadedVT = LoadN->getMemoryVT();
3651 
3652   if (ExtVT == LoadedVT &&
3653       (!LegalOperations ||
3654        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3655     // ZEXTLOAD will match without needing to change the size of the value being
3656     // loaded.
3657     NarrowLoad = false;
3658     return true;
3659   }
3660 
3661   // Do not change the width of a volatile load.
3662   if (LoadN->isVolatile())
3663     return false;
3664 
3665   // Do not generate loads of non-round integer types since these can
3666   // be expensive (and would be wrong if the type is not byte sized).
3667   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3668     return false;
3669 
3670   if (LegalOperations &&
3671       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3672     return false;
3673 
3674   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3675     return false;
3676 
3677   NarrowLoad = true;
3678   return true;
3679 }
3680 
3681 SDValue DAGCombiner::visitAND(SDNode *N) {
3682   SDValue N0 = N->getOperand(0);
3683   SDValue N1 = N->getOperand(1);
3684   EVT VT = N1.getValueType();
3685 
3686   // x & x --> x
3687   if (N0 == N1)
3688     return N0;
3689 
3690   // fold vector ops
3691   if (VT.isVector()) {
3692     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3693       return FoldedVOp;
3694 
3695     // fold (and x, 0) -> 0, vector edition
3696     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3697       // do not return N0, because undef node may exist in N0
3698       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3699                              SDLoc(N), N0.getValueType());
3700     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3701       // do not return N1, because undef node may exist in N1
3702       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3703                              SDLoc(N), N1.getValueType());
3704 
3705     // fold (and x, -1) -> x, vector edition
3706     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3707       return N1;
3708     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3709       return N0;
3710   }
3711 
3712   // fold (and c1, c2) -> c1&c2
3713   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3714   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3715   if (N0C && N1C && !N1C->isOpaque())
3716     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3717   // canonicalize constant to RHS
3718   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3719      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3720     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3721   // fold (and x, -1) -> x
3722   if (isAllOnesConstant(N1))
3723     return N0;
3724   // if (and x, c) is known to be zero, return 0
3725   unsigned BitWidth = VT.getScalarSizeInBits();
3726   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3727                                    APInt::getAllOnesValue(BitWidth)))
3728     return DAG.getConstant(0, SDLoc(N), VT);
3729 
3730   if (SDValue NewSel = foldBinOpIntoSelect(N))
3731     return NewSel;
3732 
3733   // reassociate and
3734   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3735     return RAND;
3736   // fold (and (or x, C), D) -> D if (C & D) == D
3737   if (N1C && N0.getOpcode() == ISD::OR)
3738     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3739       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3740         return N1;
3741   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3742   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3743     SDValue N0Op0 = N0.getOperand(0);
3744     APInt Mask = ~N1C->getAPIntValue();
3745     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3746     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3747       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3748                                  N0.getValueType(), N0Op0);
3749 
3750       // Replace uses of the AND with uses of the Zero extend node.
3751       CombineTo(N, Zext);
3752 
3753       // We actually want to replace all uses of the any_extend with the
3754       // zero_extend, to avoid duplicating things.  This will later cause this
3755       // AND to be folded.
3756       CombineTo(N0.getNode(), Zext);
3757       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3758     }
3759   }
3760   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3761   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3762   // already be zero by virtue of the width of the base type of the load.
3763   //
3764   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3765   // more cases.
3766   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3767        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3768        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3769        N0.getOperand(0).getResNo() == 0) ||
3770       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3771     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3772                                          N0 : N0.getOperand(0) );
3773 
3774     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3775     // This can be a pure constant or a vector splat, in which case we treat the
3776     // vector as a scalar and use the splat value.
3777     APInt Constant = APInt::getNullValue(1);
3778     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3779       Constant = C->getAPIntValue();
3780     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3781       APInt SplatValue, SplatUndef;
3782       unsigned SplatBitSize;
3783       bool HasAnyUndefs;
3784       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3785                                              SplatBitSize, HasAnyUndefs);
3786       if (IsSplat) {
3787         // Undef bits can contribute to a possible optimisation if set, so
3788         // set them.
3789         SplatValue |= SplatUndef;
3790 
3791         // The splat value may be something like "0x00FFFFFF", which means 0 for
3792         // the first vector value and FF for the rest, repeating. We need a mask
3793         // that will apply equally to all members of the vector, so AND all the
3794         // lanes of the constant together.
3795         EVT VT = Vector->getValueType(0);
3796         unsigned BitWidth = VT.getScalarSizeInBits();
3797 
3798         // If the splat value has been compressed to a bitlength lower
3799         // than the size of the vector lane, we need to re-expand it to
3800         // the lane size.
3801         if (BitWidth > SplatBitSize)
3802           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3803                SplatBitSize < BitWidth;
3804                SplatBitSize = SplatBitSize * 2)
3805             SplatValue |= SplatValue.shl(SplatBitSize);
3806 
3807         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3808         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3809         if (SplatBitSize % BitWidth == 0) {
3810           Constant = APInt::getAllOnesValue(BitWidth);
3811           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3812             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3813         }
3814       }
3815     }
3816 
3817     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3818     // actually legal and isn't going to get expanded, else this is a false
3819     // optimisation.
3820     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3821                                                     Load->getValueType(0),
3822                                                     Load->getMemoryVT());
3823 
3824     // Resize the constant to the same size as the original memory access before
3825     // extension. If it is still the AllOnesValue then this AND is completely
3826     // unneeded.
3827     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3828 
3829     bool B;
3830     switch (Load->getExtensionType()) {
3831     default: B = false; break;
3832     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3833     case ISD::ZEXTLOAD:
3834     case ISD::NON_EXTLOAD: B = true; break;
3835     }
3836 
3837     if (B && Constant.isAllOnesValue()) {
3838       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3839       // preserve semantics once we get rid of the AND.
3840       SDValue NewLoad(Load, 0);
3841 
3842       // Fold the AND away. NewLoad may get replaced immediately.
3843       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3844 
3845       if (Load->getExtensionType() == ISD::EXTLOAD) {
3846         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3847                               Load->getValueType(0), SDLoc(Load),
3848                               Load->getChain(), Load->getBasePtr(),
3849                               Load->getOffset(), Load->getMemoryVT(),
3850                               Load->getMemOperand());
3851         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3852         if (Load->getNumValues() == 3) {
3853           // PRE/POST_INC loads have 3 values.
3854           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3855                            NewLoad.getValue(2) };
3856           CombineTo(Load, To, 3, true);
3857         } else {
3858           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3859         }
3860       }
3861 
3862       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3863     }
3864   }
3865 
3866   // fold (and (load x), 255) -> (zextload x, i8)
3867   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3868   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3869   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3870                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3871                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3872     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3873     LoadSDNode *LN0 = HasAnyExt
3874       ? cast<LoadSDNode>(N0.getOperand(0))
3875       : cast<LoadSDNode>(N0);
3876     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3877         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3878       auto NarrowLoad = false;
3879       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3880       EVT ExtVT, LoadedVT;
3881       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3882                            NarrowLoad)) {
3883         if (!NarrowLoad) {
3884           SDValue NewLoad =
3885             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3886                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3887                            LN0->getMemOperand());
3888           AddToWorklist(N);
3889           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3890           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3891         } else {
3892           EVT PtrType = LN0->getOperand(1).getValueType();
3893 
3894           unsigned Alignment = LN0->getAlignment();
3895           SDValue NewPtr = LN0->getBasePtr();
3896 
3897           // For big endian targets, we need to add an offset to the pointer
3898           // to load the correct bytes.  For little endian systems, we merely
3899           // need to read fewer bytes from the same pointer.
3900           if (DAG.getDataLayout().isBigEndian()) {
3901             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3902             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3903             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3904             SDLoc DL(LN0);
3905             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3906                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3907             Alignment = MinAlign(Alignment, PtrOff);
3908           }
3909 
3910           AddToWorklist(NewPtr.getNode());
3911 
3912           SDValue Load = DAG.getExtLoad(
3913               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3914               LN0->getPointerInfo(), ExtVT, Alignment,
3915               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3916           AddToWorklist(N);
3917           CombineTo(LN0, Load, Load.getValue(1));
3918           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3919         }
3920       }
3921     }
3922   }
3923 
3924   if (SDValue Combined = visitANDLike(N0, N1, N))
3925     return Combined;
3926 
3927   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3928   if (N0.getOpcode() == N1.getOpcode())
3929     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3930       return Tmp;
3931 
3932   // Masking the negated extension of a boolean is just the zero-extended
3933   // boolean:
3934   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3935   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3936   //
3937   // Note: the SimplifyDemandedBits fold below can make an information-losing
3938   // transform, and then we have no way to find this better fold.
3939   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3940     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
3941       SDValue SubRHS = N0.getOperand(1);
3942       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3943           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3944         return SubRHS;
3945       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3946           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3947         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3948     }
3949   }
3950 
3951   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3952   // fold (and (sra)) -> (and (srl)) when possible.
3953   if (SimplifyDemandedBits(SDValue(N, 0)))
3954     return SDValue(N, 0);
3955 
3956   // fold (zext_inreg (extload x)) -> (zextload x)
3957   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3958     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3959     EVT MemVT = LN0->getMemoryVT();
3960     // If we zero all the possible extended bits, then we can turn this into
3961     // a zextload if we are running before legalize or the operation is legal.
3962     unsigned BitWidth = N1.getScalarValueSizeInBits();
3963     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3964                            BitWidth - MemVT.getScalarSizeInBits())) &&
3965         ((!LegalOperations && !LN0->isVolatile()) ||
3966          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3967       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3968                                        LN0->getChain(), LN0->getBasePtr(),
3969                                        MemVT, LN0->getMemOperand());
3970       AddToWorklist(N);
3971       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3972       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3973     }
3974   }
3975   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3976   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3977       N0.hasOneUse()) {
3978     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3979     EVT MemVT = LN0->getMemoryVT();
3980     // If we zero all the possible extended bits, then we can turn this into
3981     // a zextload if we are running before legalize or the operation is legal.
3982     unsigned BitWidth = N1.getScalarValueSizeInBits();
3983     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3984                            BitWidth - MemVT.getScalarSizeInBits())) &&
3985         ((!LegalOperations && !LN0->isVolatile()) ||
3986          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3987       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3988                                        LN0->getChain(), LN0->getBasePtr(),
3989                                        MemVT, LN0->getMemOperand());
3990       AddToWorklist(N);
3991       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3992       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3993     }
3994   }
3995   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3996   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3997     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3998                                            N0.getOperand(1), false))
3999       return BSwap;
4000   }
4001 
4002   return SDValue();
4003 }
4004 
4005 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4006 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4007                                         bool DemandHighBits) {
4008   if (!LegalOperations)
4009     return SDValue();
4010 
4011   EVT VT = N->getValueType(0);
4012   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4013     return SDValue();
4014   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4015     return SDValue();
4016 
4017   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
4018   bool LookPassAnd0 = false;
4019   bool LookPassAnd1 = false;
4020   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4021       std::swap(N0, N1);
4022   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4023       std::swap(N0, N1);
4024   if (N0.getOpcode() == ISD::AND) {
4025     if (!N0.getNode()->hasOneUse())
4026       return SDValue();
4027     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4028     if (!N01C || N01C->getZExtValue() != 0xFF00)
4029       return SDValue();
4030     N0 = N0.getOperand(0);
4031     LookPassAnd0 = true;
4032   }
4033 
4034   if (N1.getOpcode() == ISD::AND) {
4035     if (!N1.getNode()->hasOneUse())
4036       return SDValue();
4037     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4038     if (!N11C || N11C->getZExtValue() != 0xFF)
4039       return SDValue();
4040     N1 = N1.getOperand(0);
4041     LookPassAnd1 = true;
4042   }
4043 
4044   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4045     std::swap(N0, N1);
4046   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4047     return SDValue();
4048   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4049     return SDValue();
4050 
4051   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4052   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4053   if (!N01C || !N11C)
4054     return SDValue();
4055   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4056     return SDValue();
4057 
4058   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4059   SDValue N00 = N0->getOperand(0);
4060   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4061     if (!N00.getNode()->hasOneUse())
4062       return SDValue();
4063     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4064     if (!N001C || N001C->getZExtValue() != 0xFF)
4065       return SDValue();
4066     N00 = N00.getOperand(0);
4067     LookPassAnd0 = true;
4068   }
4069 
4070   SDValue N10 = N1->getOperand(0);
4071   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4072     if (!N10.getNode()->hasOneUse())
4073       return SDValue();
4074     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4075     if (!N101C || N101C->getZExtValue() != 0xFF00)
4076       return SDValue();
4077     N10 = N10.getOperand(0);
4078     LookPassAnd1 = true;
4079   }
4080 
4081   if (N00 != N10)
4082     return SDValue();
4083 
4084   // Make sure everything beyond the low halfword gets set to zero since the SRL
4085   // 16 will clear the top bits.
4086   unsigned OpSizeInBits = VT.getSizeInBits();
4087   if (DemandHighBits && OpSizeInBits > 16) {
4088     // If the left-shift isn't masked out then the only way this is a bswap is
4089     // if all bits beyond the low 8 are 0. In that case the entire pattern
4090     // reduces to a left shift anyway: leave it for other parts of the combiner.
4091     if (!LookPassAnd0)
4092       return SDValue();
4093 
4094     // However, if the right shift isn't masked out then it might be because
4095     // it's not needed. See if we can spot that too.
4096     if (!LookPassAnd1 &&
4097         !DAG.MaskedValueIsZero(
4098             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4099       return SDValue();
4100   }
4101 
4102   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4103   if (OpSizeInBits > 16) {
4104     SDLoc DL(N);
4105     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4106                       DAG.getConstant(OpSizeInBits - 16, DL,
4107                                       getShiftAmountTy(VT)));
4108   }
4109   return Res;
4110 }
4111 
4112 /// Return true if the specified node is an element that makes up a 32-bit
4113 /// packed halfword byteswap.
4114 /// ((x & 0x000000ff) << 8) |
4115 /// ((x & 0x0000ff00) >> 8) |
4116 /// ((x & 0x00ff0000) << 8) |
4117 /// ((x & 0xff000000) >> 8)
4118 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4119   if (!N.getNode()->hasOneUse())
4120     return false;
4121 
4122   unsigned Opc = N.getOpcode();
4123   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4124     return false;
4125 
4126   SDValue N0 = N.getOperand(0);
4127   unsigned Opc0 = N0.getOpcode();
4128   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4129     return false;
4130 
4131   ConstantSDNode *N1C = nullptr;
4132   // SHL or SRL: look upstream for AND mask operand
4133   if (Opc == ISD::AND)
4134     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4135   else if (Opc0 == ISD::AND)
4136     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4137   if (!N1C)
4138     return false;
4139 
4140   unsigned MaskByteOffset;
4141   switch (N1C->getZExtValue()) {
4142   default:
4143     return false;
4144   case 0xFF:       MaskByteOffset = 0; break;
4145   case 0xFF00:     MaskByteOffset = 1; break;
4146   case 0xFF0000:   MaskByteOffset = 2; break;
4147   case 0xFF000000: MaskByteOffset = 3; break;
4148   }
4149 
4150   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4151   if (Opc == ISD::AND) {
4152     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4153       // (x >> 8) & 0xff
4154       // (x >> 8) & 0xff0000
4155       if (Opc0 != ISD::SRL)
4156         return false;
4157       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4158       if (!C || C->getZExtValue() != 8)
4159         return false;
4160     } else {
4161       // (x << 8) & 0xff00
4162       // (x << 8) & 0xff000000
4163       if (Opc0 != ISD::SHL)
4164         return false;
4165       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4166       if (!C || C->getZExtValue() != 8)
4167         return false;
4168     }
4169   } else if (Opc == ISD::SHL) {
4170     // (x & 0xff) << 8
4171     // (x & 0xff0000) << 8
4172     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4173       return false;
4174     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4175     if (!C || C->getZExtValue() != 8)
4176       return false;
4177   } else { // Opc == ISD::SRL
4178     // (x & 0xff00) >> 8
4179     // (x & 0xff000000) >> 8
4180     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4181       return false;
4182     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4183     if (!C || C->getZExtValue() != 8)
4184       return false;
4185   }
4186 
4187   if (Parts[MaskByteOffset])
4188     return false;
4189 
4190   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4191   return true;
4192 }
4193 
4194 /// Match a 32-bit packed halfword bswap. That is
4195 /// ((x & 0x000000ff) << 8) |
4196 /// ((x & 0x0000ff00) >> 8) |
4197 /// ((x & 0x00ff0000) << 8) |
4198 /// ((x & 0xff000000) >> 8)
4199 /// => (rotl (bswap x), 16)
4200 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4201   if (!LegalOperations)
4202     return SDValue();
4203 
4204   EVT VT = N->getValueType(0);
4205   if (VT != MVT::i32)
4206     return SDValue();
4207   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4208     return SDValue();
4209 
4210   // Look for either
4211   // (or (or (and), (and)), (or (and), (and)))
4212   // (or (or (or (and), (and)), (and)), (and))
4213   if (N0.getOpcode() != ISD::OR)
4214     return SDValue();
4215   SDValue N00 = N0.getOperand(0);
4216   SDValue N01 = N0.getOperand(1);
4217   SDNode *Parts[4] = {};
4218 
4219   if (N1.getOpcode() == ISD::OR &&
4220       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4221     // (or (or (and), (and)), (or (and), (and)))
4222     if (!isBSwapHWordElement(N00, Parts))
4223       return SDValue();
4224 
4225     if (!isBSwapHWordElement(N01, Parts))
4226       return SDValue();
4227     SDValue N10 = N1.getOperand(0);
4228     if (!isBSwapHWordElement(N10, Parts))
4229       return SDValue();
4230     SDValue N11 = N1.getOperand(1);
4231     if (!isBSwapHWordElement(N11, Parts))
4232       return SDValue();
4233   } else {
4234     // (or (or (or (and), (and)), (and)), (and))
4235     if (!isBSwapHWordElement(N1, Parts))
4236       return SDValue();
4237     if (!isBSwapHWordElement(N01, Parts))
4238       return SDValue();
4239     if (N00.getOpcode() != ISD::OR)
4240       return SDValue();
4241     SDValue N000 = N00.getOperand(0);
4242     if (!isBSwapHWordElement(N000, Parts))
4243       return SDValue();
4244     SDValue N001 = N00.getOperand(1);
4245     if (!isBSwapHWordElement(N001, Parts))
4246       return SDValue();
4247   }
4248 
4249   // Make sure the parts are all coming from the same node.
4250   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4251     return SDValue();
4252 
4253   SDLoc DL(N);
4254   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4255                               SDValue(Parts[0], 0));
4256 
4257   // Result of the bswap should be rotated by 16. If it's not legal, then
4258   // do  (x << 16) | (x >> 16).
4259   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4260   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4261     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4262   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4263     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4264   return DAG.getNode(ISD::OR, DL, VT,
4265                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4266                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4267 }
4268 
4269 /// This contains all DAGCombine rules which reduce two values combined by
4270 /// an Or operation to a single value \see visitANDLike().
4271 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4272   EVT VT = N1.getValueType();
4273   SDLoc DL(N);
4274 
4275   // fold (or x, undef) -> -1
4276   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4277     return DAG.getAllOnesConstant(DL, VT);
4278 
4279   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4280     return V;
4281 
4282   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4283   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4284       // Don't increase # computations.
4285       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4286     // We can only do this xform if we know that bits from X that are set in C2
4287     // but not in C1 are already zero.  Likewise for Y.
4288     if (const ConstantSDNode *N0O1C =
4289         getAsNonOpaqueConstant(N0.getOperand(1))) {
4290       if (const ConstantSDNode *N1O1C =
4291           getAsNonOpaqueConstant(N1.getOperand(1))) {
4292         // We can only do this xform if we know that bits from X that are set in
4293         // C2 but not in C1 are already zero.  Likewise for Y.
4294         const APInt &LHSMask = N0O1C->getAPIntValue();
4295         const APInt &RHSMask = N1O1C->getAPIntValue();
4296 
4297         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4298             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4299           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4300                                   N0.getOperand(0), N1.getOperand(0));
4301           return DAG.getNode(ISD::AND, DL, VT, X,
4302                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4303         }
4304       }
4305     }
4306   }
4307 
4308   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4309   if (N0.getOpcode() == ISD::AND &&
4310       N1.getOpcode() == ISD::AND &&
4311       N0.getOperand(0) == N1.getOperand(0) &&
4312       // Don't increase # computations.
4313       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4314     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4315                             N0.getOperand(1), N1.getOperand(1));
4316     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4317   }
4318 
4319   return SDValue();
4320 }
4321 
4322 SDValue DAGCombiner::visitOR(SDNode *N) {
4323   SDValue N0 = N->getOperand(0);
4324   SDValue N1 = N->getOperand(1);
4325   EVT VT = N1.getValueType();
4326 
4327   // x | x --> x
4328   if (N0 == N1)
4329     return N0;
4330 
4331   // fold vector ops
4332   if (VT.isVector()) {
4333     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4334       return FoldedVOp;
4335 
4336     // fold (or x, 0) -> x, vector edition
4337     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4338       return N1;
4339     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4340       return N0;
4341 
4342     // fold (or x, -1) -> -1, vector edition
4343     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4344       // do not return N0, because undef node may exist in N0
4345       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4346     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4347       // do not return N1, because undef node may exist in N1
4348       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4349 
4350     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4351     // Do this only if the resulting shuffle is legal.
4352     if (isa<ShuffleVectorSDNode>(N0) &&
4353         isa<ShuffleVectorSDNode>(N1) &&
4354         // Avoid folding a node with illegal type.
4355         TLI.isTypeLegal(VT)) {
4356       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4357       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4358       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4359       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4360       // Ensure both shuffles have a zero input.
4361       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4362         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4363         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4364         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4365         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4366         bool CanFold = true;
4367         int NumElts = VT.getVectorNumElements();
4368         SmallVector<int, 4> Mask(NumElts);
4369 
4370         for (int i = 0; i != NumElts; ++i) {
4371           int M0 = SV0->getMaskElt(i);
4372           int M1 = SV1->getMaskElt(i);
4373 
4374           // Determine if either index is pointing to a zero vector.
4375           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4376           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4377 
4378           // If one element is zero and the otherside is undef, keep undef.
4379           // This also handles the case that both are undef.
4380           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4381             Mask[i] = -1;
4382             continue;
4383           }
4384 
4385           // Make sure only one of the elements is zero.
4386           if (M0Zero == M1Zero) {
4387             CanFold = false;
4388             break;
4389           }
4390 
4391           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4392 
4393           // We have a zero and non-zero element. If the non-zero came from
4394           // SV0 make the index a LHS index. If it came from SV1, make it
4395           // a RHS index. We need to mod by NumElts because we don't care
4396           // which operand it came from in the original shuffles.
4397           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4398         }
4399 
4400         if (CanFold) {
4401           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4402           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4403 
4404           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4405           if (!LegalMask) {
4406             std::swap(NewLHS, NewRHS);
4407             ShuffleVectorSDNode::commuteMask(Mask);
4408             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4409           }
4410 
4411           if (LegalMask)
4412             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4413         }
4414       }
4415     }
4416   }
4417 
4418   // fold (or c1, c2) -> c1|c2
4419   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4420   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4421   if (N0C && N1C && !N1C->isOpaque())
4422     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4423   // canonicalize constant to RHS
4424   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4425      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4426     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4427   // fold (or x, 0) -> x
4428   if (isNullConstant(N1))
4429     return N0;
4430   // fold (or x, -1) -> -1
4431   if (isAllOnesConstant(N1))
4432     return N1;
4433 
4434   if (SDValue NewSel = foldBinOpIntoSelect(N))
4435     return NewSel;
4436 
4437   // fold (or x, c) -> c iff (x & ~c) == 0
4438   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4439     return N1;
4440 
4441   if (SDValue Combined = visitORLike(N0, N1, N))
4442     return Combined;
4443 
4444   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4445   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4446     return BSwap;
4447   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4448     return BSwap;
4449 
4450   // reassociate or
4451   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4452     return ROR;
4453 
4454   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4455   // iff (c1 & c2) != 0.
4456   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4457     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4458       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4459         if (SDValue COR =
4460                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4461           return DAG.getNode(
4462               ISD::AND, SDLoc(N), VT,
4463               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4464         return SDValue();
4465       }
4466     }
4467   }
4468 
4469   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4470   if (N0.getOpcode() == N1.getOpcode())
4471     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4472       return Tmp;
4473 
4474   // See if this is some rotate idiom.
4475   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4476     return SDValue(Rot, 0);
4477 
4478   if (SDValue Load = MatchLoadCombine(N))
4479     return Load;
4480 
4481   // Simplify the operands using demanded-bits information.
4482   if (SimplifyDemandedBits(SDValue(N, 0)))
4483     return SDValue(N, 0);
4484 
4485   return SDValue();
4486 }
4487 
4488 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4489 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4490   if (Op.getOpcode() == ISD::AND) {
4491     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4492       Mask = Op.getOperand(1);
4493       Op = Op.getOperand(0);
4494     } else {
4495       return false;
4496     }
4497   }
4498 
4499   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4500     Shift = Op;
4501     return true;
4502   }
4503 
4504   return false;
4505 }
4506 
4507 // Return true if we can prove that, whenever Neg and Pos are both in the
4508 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4509 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4510 //
4511 //     (or (shift1 X, Neg), (shift2 X, Pos))
4512 //
4513 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4514 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4515 // to consider shift amounts with defined behavior.
4516 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4517   // If EltSize is a power of 2 then:
4518   //
4519   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4520   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4521   //
4522   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4523   // for the stronger condition:
4524   //
4525   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4526   //
4527   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4528   // we can just replace Neg with Neg' for the rest of the function.
4529   //
4530   // In other cases we check for the even stronger condition:
4531   //
4532   //     Neg == EltSize - Pos                                    [B]
4533   //
4534   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4535   // behavior if Pos == 0 (and consequently Neg == EltSize).
4536   //
4537   // We could actually use [A] whenever EltSize is a power of 2, but the
4538   // only extra cases that it would match are those uninteresting ones
4539   // where Neg and Pos are never in range at the same time.  E.g. for
4540   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4541   // as well as (sub 32, Pos), but:
4542   //
4543   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4544   //
4545   // always invokes undefined behavior for 32-bit X.
4546   //
4547   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4548   unsigned MaskLoBits = 0;
4549   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4550     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4551       if (NegC->getAPIntValue() == EltSize - 1) {
4552         Neg = Neg.getOperand(0);
4553         MaskLoBits = Log2_64(EltSize);
4554       }
4555     }
4556   }
4557 
4558   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4559   if (Neg.getOpcode() != ISD::SUB)
4560     return false;
4561   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4562   if (!NegC)
4563     return false;
4564   SDValue NegOp1 = Neg.getOperand(1);
4565 
4566   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4567   // Pos'.  The truncation is redundant for the purpose of the equality.
4568   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4569     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4570       if (PosC->getAPIntValue() == EltSize - 1)
4571         Pos = Pos.getOperand(0);
4572 
4573   // The condition we need is now:
4574   //
4575   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4576   //
4577   // If NegOp1 == Pos then we need:
4578   //
4579   //              EltSize & Mask == NegC & Mask
4580   //
4581   // (because "x & Mask" is a truncation and distributes through subtraction).
4582   APInt Width;
4583   if (Pos == NegOp1)
4584     Width = NegC->getAPIntValue();
4585 
4586   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4587   // Then the condition we want to prove becomes:
4588   //
4589   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4590   //
4591   // which, again because "x & Mask" is a truncation, becomes:
4592   //
4593   //                NegC & Mask == (EltSize - PosC) & Mask
4594   //             EltSize & Mask == (NegC + PosC) & Mask
4595   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4596     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4597       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4598     else
4599       return false;
4600   } else
4601     return false;
4602 
4603   // Now we just need to check that EltSize & Mask == Width & Mask.
4604   if (MaskLoBits)
4605     // EltSize & Mask is 0 since Mask is EltSize - 1.
4606     return Width.getLoBits(MaskLoBits) == 0;
4607   return Width == EltSize;
4608 }
4609 
4610 // A subroutine of MatchRotate used once we have found an OR of two opposite
4611 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4612 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4613 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4614 // Neg with outer conversions stripped away.
4615 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4616                                        SDValue Neg, SDValue InnerPos,
4617                                        SDValue InnerNeg, unsigned PosOpcode,
4618                                        unsigned NegOpcode, const SDLoc &DL) {
4619   // fold (or (shl x, (*ext y)),
4620   //          (srl x, (*ext (sub 32, y)))) ->
4621   //   (rotl x, y) or (rotr x, (sub 32, y))
4622   //
4623   // fold (or (shl x, (*ext (sub 32, y))),
4624   //          (srl x, (*ext y))) ->
4625   //   (rotr x, y) or (rotl x, (sub 32, y))
4626   EVT VT = Shifted.getValueType();
4627   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4628     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4629     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4630                        HasPos ? Pos : Neg).getNode();
4631   }
4632 
4633   return nullptr;
4634 }
4635 
4636 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4637 // idioms for rotate, and if the target supports rotation instructions, generate
4638 // a rot[lr].
4639 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4640   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4641   EVT VT = LHS.getValueType();
4642   if (!TLI.isTypeLegal(VT)) return nullptr;
4643 
4644   // The target must have at least one rotate flavor.
4645   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4646   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4647   if (!HasROTL && !HasROTR) return nullptr;
4648 
4649   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4650   SDValue LHSShift;   // The shift.
4651   SDValue LHSMask;    // AND value if any.
4652   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4653     return nullptr; // Not part of a rotate.
4654 
4655   SDValue RHSShift;   // The shift.
4656   SDValue RHSMask;    // AND value if any.
4657   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4658     return nullptr; // Not part of a rotate.
4659 
4660   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4661     return nullptr;   // Not shifting the same value.
4662 
4663   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4664     return nullptr;   // Shifts must disagree.
4665 
4666   // Canonicalize shl to left side in a shl/srl pair.
4667   if (RHSShift.getOpcode() == ISD::SHL) {
4668     std::swap(LHS, RHS);
4669     std::swap(LHSShift, RHSShift);
4670     std::swap(LHSMask, RHSMask);
4671   }
4672 
4673   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4674   SDValue LHSShiftArg = LHSShift.getOperand(0);
4675   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4676   SDValue RHSShiftArg = RHSShift.getOperand(0);
4677   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4678 
4679   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4680   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4681   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4682                                         ConstantSDNode *RHS) {
4683     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4684   };
4685   if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4686     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4687                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4688 
4689     // If there is an AND of either shifted operand, apply it to the result.
4690     if (LHSMask.getNode() || RHSMask.getNode()) {
4691       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4692       SDValue Mask = AllOnes;
4693 
4694       if (LHSMask.getNode()) {
4695         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4696         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4697                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4698       }
4699       if (RHSMask.getNode()) {
4700         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4701         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4702                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4703       }
4704 
4705       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4706     }
4707 
4708     return Rot.getNode();
4709   }
4710 
4711   // If there is a mask here, and we have a variable shift, we can't be sure
4712   // that we're masking out the right stuff.
4713   if (LHSMask.getNode() || RHSMask.getNode())
4714     return nullptr;
4715 
4716   // If the shift amount is sign/zext/any-extended just peel it off.
4717   SDValue LExtOp0 = LHSShiftAmt;
4718   SDValue RExtOp0 = RHSShiftAmt;
4719   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4720        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4721        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4722        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4723       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4724        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4725        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4726        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4727     LExtOp0 = LHSShiftAmt.getOperand(0);
4728     RExtOp0 = RHSShiftAmt.getOperand(0);
4729   }
4730 
4731   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4732                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4733   if (TryL)
4734     return TryL;
4735 
4736   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4737                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4738   if (TryR)
4739     return TryR;
4740 
4741   return nullptr;
4742 }
4743 
4744 namespace {
4745 /// Represents known origin of an individual byte in load combine pattern. The
4746 /// value of the byte is either constant zero or comes from memory.
4747 struct ByteProvider {
4748   // For constant zero providers Load is set to nullptr. For memory providers
4749   // Load represents the node which loads the byte from memory.
4750   // ByteOffset is the offset of the byte in the value produced by the load.
4751   LoadSDNode *Load;
4752   unsigned ByteOffset;
4753 
4754   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4755 
4756   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4757     return ByteProvider(Load, ByteOffset);
4758   }
4759   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4760 
4761   bool isConstantZero() const { return !Load; }
4762   bool isMemory() const { return Load; }
4763 
4764   bool operator==(const ByteProvider &Other) const {
4765     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4766   }
4767 
4768 private:
4769   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4770       : Load(Load), ByteOffset(ByteOffset) {}
4771 };
4772 
4773 /// Recursively traverses the expression calculating the origin of the requested
4774 /// byte of the given value. Returns None if the provider can't be calculated.
4775 ///
4776 /// For all the values except the root of the expression verifies that the value
4777 /// has exactly one use and if it's not true return None. This way if the origin
4778 /// of the byte is returned it's guaranteed that the values which contribute to
4779 /// the byte are not used outside of this expression.
4780 ///
4781 /// Because the parts of the expression are not allowed to have more than one
4782 /// use this function iterates over trees, not DAGs. So it never visits the same
4783 /// node more than once.
4784 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4785                                                    unsigned Depth,
4786                                                    bool Root = false) {
4787   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4788   if (Depth == 10)
4789     return None;
4790 
4791   if (!Root && !Op.hasOneUse())
4792     return None;
4793 
4794   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4795   unsigned BitWidth = Op.getValueSizeInBits();
4796   if (BitWidth % 8 != 0)
4797     return None;
4798   unsigned ByteWidth = BitWidth / 8;
4799   assert(Index < ByteWidth && "invalid index requested");
4800   (void) ByteWidth;
4801 
4802   switch (Op.getOpcode()) {
4803   case ISD::OR: {
4804     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4805     if (!LHS)
4806       return None;
4807     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4808     if (!RHS)
4809       return None;
4810 
4811     if (LHS->isConstantZero())
4812       return RHS;
4813     if (RHS->isConstantZero())
4814       return LHS;
4815     return None;
4816   }
4817   case ISD::SHL: {
4818     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4819     if (!ShiftOp)
4820       return None;
4821 
4822     uint64_t BitShift = ShiftOp->getZExtValue();
4823     if (BitShift % 8 != 0)
4824       return None;
4825     uint64_t ByteShift = BitShift / 8;
4826 
4827     return Index < ByteShift
4828                ? ByteProvider::getConstantZero()
4829                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4830                                        Depth + 1);
4831   }
4832   case ISD::ANY_EXTEND:
4833   case ISD::SIGN_EXTEND:
4834   case ISD::ZERO_EXTEND: {
4835     SDValue NarrowOp = Op->getOperand(0);
4836     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4837     if (NarrowBitWidth % 8 != 0)
4838       return None;
4839     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4840 
4841     if (Index >= NarrowByteWidth)
4842       return Op.getOpcode() == ISD::ZERO_EXTEND
4843                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4844                  : None;
4845     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4846   }
4847   case ISD::BSWAP:
4848     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4849                                  Depth + 1);
4850   case ISD::LOAD: {
4851     auto L = cast<LoadSDNode>(Op.getNode());
4852     if (L->isVolatile() || L->isIndexed())
4853       return None;
4854 
4855     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4856     if (NarrowBitWidth % 8 != 0)
4857       return None;
4858     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4859 
4860     if (Index >= NarrowByteWidth)
4861       return L->getExtensionType() == ISD::ZEXTLOAD
4862                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4863                  : None;
4864     return ByteProvider::getMemory(L, Index);
4865   }
4866   }
4867 
4868   return None;
4869 }
4870 } // namespace
4871 
4872 /// Match a pattern where a wide type scalar value is loaded by several narrow
4873 /// loads and combined by shifts and ors. Fold it into a single load or a load
4874 /// and a BSWAP if the targets supports it.
4875 ///
4876 /// Assuming little endian target:
4877 ///  i8 *a = ...
4878 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4879 /// =>
4880 ///  i32 val = *((i32)a)
4881 ///
4882 ///  i8 *a = ...
4883 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4884 /// =>
4885 ///  i32 val = BSWAP(*((i32)a))
4886 ///
4887 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4888 /// interact well with the worklist mechanism. When a part of the pattern is
4889 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4890 /// but the root node of the pattern which triggers the load combine is not
4891 /// necessarily a direct user of the changed node. For example, once the address
4892 /// of t28 load is reassociated load combine won't be triggered:
4893 ///             t25: i32 = add t4, Constant:i32<2>
4894 ///           t26: i64 = sign_extend t25
4895 ///        t27: i64 = add t2, t26
4896 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4897 ///     t29: i32 = zero_extend t28
4898 ///   t32: i32 = shl t29, Constant:i8<8>
4899 /// t33: i32 = or t23, t32
4900 /// As a possible fix visitLoad can check if the load can be a part of a load
4901 /// combine pattern and add corresponding OR roots to the worklist.
4902 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4903   assert(N->getOpcode() == ISD::OR &&
4904          "Can only match load combining against OR nodes");
4905 
4906   // Handles simple types only
4907   EVT VT = N->getValueType(0);
4908   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4909     return SDValue();
4910   unsigned ByteWidth = VT.getSizeInBits() / 8;
4911 
4912   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4913   // Before legalize we can introduce too wide illegal loads which will be later
4914   // split into legal sized loads. This enables us to combine i64 load by i8
4915   // patterns to a couple of i32 loads on 32 bit targets.
4916   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4917     return SDValue();
4918 
4919   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4920     unsigned BW, unsigned i) { return i; };
4921   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4922     unsigned BW, unsigned i) { return BW - i - 1; };
4923 
4924   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4925   auto MemoryByteOffset = [&] (ByteProvider P) {
4926     assert(P.isMemory() && "Must be a memory byte provider");
4927     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4928     assert(LoadBitWidth % 8 == 0 &&
4929            "can only analyze providers for individual bytes not bit");
4930     unsigned LoadByteWidth = LoadBitWidth / 8;
4931     return IsBigEndianTarget
4932             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4933             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4934   };
4935 
4936   Optional<BaseIndexOffset> Base;
4937   SDValue Chain;
4938 
4939   SmallSet<LoadSDNode *, 8> Loads;
4940   Optional<ByteProvider> FirstByteProvider;
4941   int64_t FirstOffset = INT64_MAX;
4942 
4943   // Check if all the bytes of the OR we are looking at are loaded from the same
4944   // base address. Collect bytes offsets from Base address in ByteOffsets.
4945   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4946   for (unsigned i = 0; i < ByteWidth; i++) {
4947     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4948     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4949       return SDValue();
4950 
4951     LoadSDNode *L = P->Load;
4952     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4953            "Must be enforced by calculateByteProvider");
4954     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4955 
4956     // All loads must share the same chain
4957     SDValue LChain = L->getChain();
4958     if (!Chain)
4959       Chain = LChain;
4960     else if (Chain != LChain)
4961       return SDValue();
4962 
4963     // Loads must share the same base address
4964     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4965     int64_t ByteOffsetFromBase = 0;
4966     if (!Base)
4967       Base = Ptr;
4968     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
4969       return SDValue();
4970 
4971     // Calculate the offset of the current byte from the base address
4972     ByteOffsetFromBase += MemoryByteOffset(*P);
4973     ByteOffsets[i] = ByteOffsetFromBase;
4974 
4975     // Remember the first byte load
4976     if (ByteOffsetFromBase < FirstOffset) {
4977       FirstByteProvider = P;
4978       FirstOffset = ByteOffsetFromBase;
4979     }
4980 
4981     Loads.insert(L);
4982   }
4983   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
4984          "memory, so there must be at least one load which produces the value");
4985   assert(Base && "Base address of the accessed memory location must be set");
4986   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
4987 
4988   // Check if the bytes of the OR we are looking at match with either big or
4989   // little endian value load
4990   bool BigEndian = true, LittleEndian = true;
4991   for (unsigned i = 0; i < ByteWidth; i++) {
4992     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
4993     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
4994     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
4995     if (!BigEndian && !LittleEndian)
4996       return SDValue();
4997   }
4998   assert((BigEndian != LittleEndian) && "should be either or");
4999   assert(FirstByteProvider && "must be set");
5000 
5001   // Ensure that the first byte is loaded from zero offset of the first load.
5002   // So the combined value can be loaded from the first load address.
5003   if (MemoryByteOffset(*FirstByteProvider) != 0)
5004     return SDValue();
5005   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5006 
5007   // The node we are looking at matches with the pattern, check if we can
5008   // replace it with a single load and bswap if needed.
5009 
5010   // If the load needs byte swap check if the target supports it
5011   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5012 
5013   // Before legalize we can introduce illegal bswaps which will be later
5014   // converted to an explicit bswap sequence. This way we end up with a single
5015   // load and byte shuffling instead of several loads and byte shuffling.
5016   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5017     return SDValue();
5018 
5019   // Check that a load of the wide type is both allowed and fast on the target
5020   bool Fast = false;
5021   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5022                                         VT, FirstLoad->getAddressSpace(),
5023                                         FirstLoad->getAlignment(), &Fast);
5024   if (!Allowed || !Fast)
5025     return SDValue();
5026 
5027   SDValue NewLoad =
5028       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5029                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5030 
5031   // Transfer chain users from old loads to the new load.
5032   for (LoadSDNode *L : Loads)
5033     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5034 
5035   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5036 }
5037 
5038 SDValue DAGCombiner::visitXOR(SDNode *N) {
5039   SDValue N0 = N->getOperand(0);
5040   SDValue N1 = N->getOperand(1);
5041   EVT VT = N0.getValueType();
5042 
5043   // fold vector ops
5044   if (VT.isVector()) {
5045     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5046       return FoldedVOp;
5047 
5048     // fold (xor x, 0) -> x, vector edition
5049     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5050       return N1;
5051     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5052       return N0;
5053   }
5054 
5055   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5056   if (N0.isUndef() && N1.isUndef())
5057     return DAG.getConstant(0, SDLoc(N), VT);
5058   // fold (xor x, undef) -> undef
5059   if (N0.isUndef())
5060     return N0;
5061   if (N1.isUndef())
5062     return N1;
5063   // fold (xor c1, c2) -> c1^c2
5064   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5065   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5066   if (N0C && N1C)
5067     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5068   // canonicalize constant to RHS
5069   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5070      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5071     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5072   // fold (xor x, 0) -> x
5073   if (isNullConstant(N1))
5074     return N0;
5075 
5076   if (SDValue NewSel = foldBinOpIntoSelect(N))
5077     return NewSel;
5078 
5079   // reassociate xor
5080   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5081     return RXOR;
5082 
5083   // fold !(x cc y) -> (x !cc y)
5084   SDValue LHS, RHS, CC;
5085   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5086     bool isInt = LHS.getValueType().isInteger();
5087     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5088                                                isInt);
5089 
5090     if (!LegalOperations ||
5091         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5092       switch (N0.getOpcode()) {
5093       default:
5094         llvm_unreachable("Unhandled SetCC Equivalent!");
5095       case ISD::SETCC:
5096         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5097       case ISD::SELECT_CC:
5098         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5099                                N0.getOperand(3), NotCC);
5100       }
5101     }
5102   }
5103 
5104   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5105   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5106       N0.getNode()->hasOneUse() &&
5107       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5108     SDValue V = N0.getOperand(0);
5109     SDLoc DL(N0);
5110     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5111                     DAG.getConstant(1, DL, V.getValueType()));
5112     AddToWorklist(V.getNode());
5113     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5114   }
5115 
5116   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5117   if (isOneConstant(N1) && VT == MVT::i1 &&
5118       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5119     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5120     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5121       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5122       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5123       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5124       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5125       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5126     }
5127   }
5128   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5129   if (isAllOnesConstant(N1) &&
5130       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5131     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5132     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5133       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5134       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5135       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5136       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5137       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5138     }
5139   }
5140   // fold (xor (and x, y), y) -> (and (not x), y)
5141   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5142       N0->getOperand(1) == N1) {
5143     SDValue X = N0->getOperand(0);
5144     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5145     AddToWorklist(NotX.getNode());
5146     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5147   }
5148   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5149   if (N1C && N0.getOpcode() == ISD::XOR) {
5150     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5151       SDLoc DL(N);
5152       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5153                          DAG.getConstant(N1C->getAPIntValue() ^
5154                                          N00C->getAPIntValue(), DL, VT));
5155     }
5156     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5157       SDLoc DL(N);
5158       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5159                          DAG.getConstant(N1C->getAPIntValue() ^
5160                                          N01C->getAPIntValue(), DL, VT));
5161     }
5162   }
5163 
5164   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5165   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5166   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5167       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5168       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5169     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5170       if (C->getAPIntValue() == (OpSizeInBits - 1))
5171         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5172   }
5173 
5174   // fold (xor x, x) -> 0
5175   if (N0 == N1)
5176     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5177 
5178   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5179   // Here is a concrete example of this equivalence:
5180   // i16   x ==  14
5181   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5182   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5183   //
5184   // =>
5185   //
5186   // i16     ~1      == 0b1111111111111110
5187   // i16 rol(~1, 14) == 0b1011111111111111
5188   //
5189   // Some additional tips to help conceptualize this transform:
5190   // - Try to see the operation as placing a single zero in a value of all ones.
5191   // - There exists no value for x which would allow the result to contain zero.
5192   // - Values of x larger than the bitwidth are undefined and do not require a
5193   //   consistent result.
5194   // - Pushing the zero left requires shifting one bits in from the right.
5195   // A rotate left of ~1 is a nice way of achieving the desired result.
5196   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5197       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5198     SDLoc DL(N);
5199     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5200                        N0.getOperand(1));
5201   }
5202 
5203   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5204   if (N0.getOpcode() == N1.getOpcode())
5205     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5206       return Tmp;
5207 
5208   // Simplify the expression using non-local knowledge.
5209   if (SimplifyDemandedBits(SDValue(N, 0)))
5210     return SDValue(N, 0);
5211 
5212   return SDValue();
5213 }
5214 
5215 /// Handle transforms common to the three shifts, when the shift amount is a
5216 /// constant.
5217 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5218   SDNode *LHS = N->getOperand(0).getNode();
5219   if (!LHS->hasOneUse()) return SDValue();
5220 
5221   // We want to pull some binops through shifts, so that we have (and (shift))
5222   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5223   // thing happens with address calculations, so it's important to canonicalize
5224   // it.
5225   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5226 
5227   switch (LHS->getOpcode()) {
5228   default: return SDValue();
5229   case ISD::OR:
5230   case ISD::XOR:
5231     HighBitSet = false; // We can only transform sra if the high bit is clear.
5232     break;
5233   case ISD::AND:
5234     HighBitSet = true;  // We can only transform sra if the high bit is set.
5235     break;
5236   case ISD::ADD:
5237     if (N->getOpcode() != ISD::SHL)
5238       return SDValue(); // only shl(add) not sr[al](add).
5239     HighBitSet = false; // We can only transform sra if the high bit is clear.
5240     break;
5241   }
5242 
5243   // We require the RHS of the binop to be a constant and not opaque as well.
5244   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5245   if (!BinOpCst) return SDValue();
5246 
5247   // FIXME: disable this unless the input to the binop is a shift by a constant
5248   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5249   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5250   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5251                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5252                  BinOpLHSVal->getOpcode() == ISD::SRL;
5253   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5254                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5255 
5256   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5257       !isCopyOrSelect)
5258     return SDValue();
5259 
5260   if (isCopyOrSelect && N->hasOneUse())
5261     return SDValue();
5262 
5263   EVT VT = N->getValueType(0);
5264 
5265   // If this is a signed shift right, and the high bit is modified by the
5266   // logical operation, do not perform the transformation. The highBitSet
5267   // boolean indicates the value of the high bit of the constant which would
5268   // cause it to be modified for this operation.
5269   if (N->getOpcode() == ISD::SRA) {
5270     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5271     if (BinOpRHSSignSet != HighBitSet)
5272       return SDValue();
5273   }
5274 
5275   if (!TLI.isDesirableToCommuteWithShift(LHS))
5276     return SDValue();
5277 
5278   // Fold the constants, shifting the binop RHS by the shift amount.
5279   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5280                                N->getValueType(0),
5281                                LHS->getOperand(1), N->getOperand(1));
5282   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5283 
5284   // Create the new shift.
5285   SDValue NewShift = DAG.getNode(N->getOpcode(),
5286                                  SDLoc(LHS->getOperand(0)),
5287                                  VT, LHS->getOperand(0), N->getOperand(1));
5288 
5289   // Create the new binop.
5290   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5291 }
5292 
5293 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5294   assert(N->getOpcode() == ISD::TRUNCATE);
5295   assert(N->getOperand(0).getOpcode() == ISD::AND);
5296 
5297   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5298   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5299     SDValue N01 = N->getOperand(0).getOperand(1);
5300     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5301       SDLoc DL(N);
5302       EVT TruncVT = N->getValueType(0);
5303       SDValue N00 = N->getOperand(0).getOperand(0);
5304       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5305       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5306       AddToWorklist(Trunc00.getNode());
5307       AddToWorklist(Trunc01.getNode());
5308       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5309     }
5310   }
5311 
5312   return SDValue();
5313 }
5314 
5315 SDValue DAGCombiner::visitRotate(SDNode *N) {
5316   SDLoc dl(N);
5317   SDValue N0 = N->getOperand(0);
5318   SDValue N1 = N->getOperand(1);
5319   EVT VT = N->getValueType(0);
5320   unsigned Bitsize = VT.getScalarSizeInBits();
5321 
5322   // fold (rot x, 0) -> x
5323   if (isNullConstantOrNullSplatConstant(N1))
5324     return N0;
5325 
5326   // fold (rot x, c) -> (rot x, c % BitSize)
5327   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5328     if (Cst->getAPIntValue().uge(Bitsize)) {
5329       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5330       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5331                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5332     }
5333   }
5334 
5335   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5336   if (N1.getOpcode() == ISD::TRUNCATE &&
5337       N1.getOperand(0).getOpcode() == ISD::AND) {
5338     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5339       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5340   }
5341 
5342   unsigned NextOp = N0.getOpcode();
5343   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5344   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5345     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5346     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5347     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5348       EVT ShiftVT = C1->getValueType(0);
5349       bool SameSide = (N->getOpcode() == NextOp);
5350       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5351       if (SDValue CombinedShift =
5352               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5353         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5354         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5355             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5356             BitsizeC.getNode());
5357         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5358                            CombinedShiftNorm);
5359       }
5360     }
5361   }
5362   return SDValue();
5363 }
5364 
5365 SDValue DAGCombiner::visitSHL(SDNode *N) {
5366   SDValue N0 = N->getOperand(0);
5367   SDValue N1 = N->getOperand(1);
5368   EVT VT = N0.getValueType();
5369   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5370 
5371   // fold vector ops
5372   if (VT.isVector()) {
5373     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5374       return FoldedVOp;
5375 
5376     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5377     // If setcc produces all-one true value then:
5378     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5379     if (N1CV && N1CV->isConstant()) {
5380       if (N0.getOpcode() == ISD::AND) {
5381         SDValue N00 = N0->getOperand(0);
5382         SDValue N01 = N0->getOperand(1);
5383         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5384 
5385         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5386             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5387                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5388           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5389                                                      N01CV, N1CV))
5390             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5391         }
5392       }
5393     }
5394   }
5395 
5396   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5397 
5398   // fold (shl c1, c2) -> c1<<c2
5399   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5400   if (N0C && N1C && !N1C->isOpaque())
5401     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5402   // fold (shl 0, x) -> 0
5403   if (isNullConstantOrNullSplatConstant(N0))
5404     return N0;
5405   // fold (shl x, c >= size(x)) -> undef
5406   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5407   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5408     return Val->getAPIntValue().uge(OpSizeInBits);
5409   };
5410   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5411     return DAG.getUNDEF(VT);
5412   // fold (shl x, 0) -> x
5413   if (N1C && N1C->isNullValue())
5414     return N0;
5415   // fold (shl undef, x) -> 0
5416   if (N0.isUndef())
5417     return DAG.getConstant(0, SDLoc(N), VT);
5418 
5419   if (SDValue NewSel = foldBinOpIntoSelect(N))
5420     return NewSel;
5421 
5422   // if (shl x, c) is known to be zero, return 0
5423   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5424                             APInt::getAllOnesValue(OpSizeInBits)))
5425     return DAG.getConstant(0, SDLoc(N), VT);
5426   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5427   if (N1.getOpcode() == ISD::TRUNCATE &&
5428       N1.getOperand(0).getOpcode() == ISD::AND) {
5429     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5430       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5431   }
5432 
5433   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5434     return SDValue(N, 0);
5435 
5436   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5437   if (N0.getOpcode() == ISD::SHL) {
5438     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5439                                           ConstantSDNode *RHS) {
5440       APInt c1 = LHS->getAPIntValue();
5441       APInt c2 = RHS->getAPIntValue();
5442       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5443       return (c1 + c2).uge(OpSizeInBits);
5444     };
5445     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5446       return DAG.getConstant(0, SDLoc(N), VT);
5447 
5448     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5449                                        ConstantSDNode *RHS) {
5450       APInt c1 = LHS->getAPIntValue();
5451       APInt c2 = RHS->getAPIntValue();
5452       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5453       return (c1 + c2).ult(OpSizeInBits);
5454     };
5455     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5456       SDLoc DL(N);
5457       EVT ShiftVT = N1.getValueType();
5458       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5459       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5460     }
5461   }
5462 
5463   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5464   // For this to be valid, the second form must not preserve any of the bits
5465   // that are shifted out by the inner shift in the first form.  This means
5466   // the outer shift size must be >= the number of bits added by the ext.
5467   // As a corollary, we don't care what kind of ext it is.
5468   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5469               N0.getOpcode() == ISD::ANY_EXTEND ||
5470               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5471       N0.getOperand(0).getOpcode() == ISD::SHL) {
5472     SDValue N0Op0 = N0.getOperand(0);
5473     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5474       APInt c1 = N0Op0C1->getAPIntValue();
5475       APInt c2 = N1C->getAPIntValue();
5476       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5477 
5478       EVT InnerShiftVT = N0Op0.getValueType();
5479       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5480       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5481         SDLoc DL(N0);
5482         APInt Sum = c1 + c2;
5483         if (Sum.uge(OpSizeInBits))
5484           return DAG.getConstant(0, DL, VT);
5485 
5486         return DAG.getNode(
5487             ISD::SHL, DL, VT,
5488             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5489             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5490       }
5491     }
5492   }
5493 
5494   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5495   // Only fold this if the inner zext has no other uses to avoid increasing
5496   // the total number of instructions.
5497   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5498       N0.getOperand(0).getOpcode() == ISD::SRL) {
5499     SDValue N0Op0 = N0.getOperand(0);
5500     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5501       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5502         uint64_t c1 = N0Op0C1->getZExtValue();
5503         uint64_t c2 = N1C->getZExtValue();
5504         if (c1 == c2) {
5505           SDValue NewOp0 = N0.getOperand(0);
5506           EVT CountVT = NewOp0.getOperand(1).getValueType();
5507           SDLoc DL(N);
5508           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5509                                        NewOp0,
5510                                        DAG.getConstant(c2, DL, CountVT));
5511           AddToWorklist(NewSHL.getNode());
5512           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5513         }
5514       }
5515     }
5516   }
5517 
5518   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5519   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5520   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5521       N0->getFlags().hasExact()) {
5522     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5523       uint64_t C1 = N0C1->getZExtValue();
5524       uint64_t C2 = N1C->getZExtValue();
5525       SDLoc DL(N);
5526       if (C1 <= C2)
5527         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5528                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5529       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5530                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5531     }
5532   }
5533 
5534   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5535   //                               (and (srl x, (sub c1, c2), MASK)
5536   // Only fold this if the inner shift has no other uses -- if it does, folding
5537   // this will increase the total number of instructions.
5538   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5539     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5540       uint64_t c1 = N0C1->getZExtValue();
5541       if (c1 < OpSizeInBits) {
5542         uint64_t c2 = N1C->getZExtValue();
5543         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5544         SDValue Shift;
5545         if (c2 > c1) {
5546           Mask <<= c2 - c1;
5547           SDLoc DL(N);
5548           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5549                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5550         } else {
5551           Mask.lshrInPlace(c1 - c2);
5552           SDLoc DL(N);
5553           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5554                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5555         }
5556         SDLoc DL(N0);
5557         return DAG.getNode(ISD::AND, DL, VT, Shift,
5558                            DAG.getConstant(Mask, DL, VT));
5559       }
5560     }
5561   }
5562 
5563   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5564   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5565       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5566     SDLoc DL(N);
5567     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5568     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5569     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5570   }
5571 
5572   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5573   // Variant of version done on multiply, except mul by a power of 2 is turned
5574   // into a shift.
5575   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5576       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5577       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5578     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5579     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5580     AddToWorklist(Shl0.getNode());
5581     AddToWorklist(Shl1.getNode());
5582     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5583   }
5584 
5585   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5586   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5587       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5588       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5589     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5590     if (isConstantOrConstantVector(Shl))
5591       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5592   }
5593 
5594   if (N1C && !N1C->isOpaque())
5595     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5596       return NewSHL;
5597 
5598   return SDValue();
5599 }
5600 
5601 SDValue DAGCombiner::visitSRA(SDNode *N) {
5602   SDValue N0 = N->getOperand(0);
5603   SDValue N1 = N->getOperand(1);
5604   EVT VT = N0.getValueType();
5605   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5606 
5607   // Arithmetic shifting an all-sign-bit value is a no-op.
5608   // fold (sra 0, x) -> 0
5609   // fold (sra -1, x) -> -1
5610   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5611     return N0;
5612 
5613   // fold vector ops
5614   if (VT.isVector())
5615     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5616       return FoldedVOp;
5617 
5618   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5619 
5620   // fold (sra c1, c2) -> (sra c1, c2)
5621   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5622   if (N0C && N1C && !N1C->isOpaque())
5623     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5624   // fold (sra x, c >= size(x)) -> undef
5625   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5626   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5627     return Val->getAPIntValue().uge(OpSizeInBits);
5628   };
5629   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5630     return DAG.getUNDEF(VT);
5631   // fold (sra x, 0) -> x
5632   if (N1C && N1C->isNullValue())
5633     return N0;
5634 
5635   if (SDValue NewSel = foldBinOpIntoSelect(N))
5636     return NewSel;
5637 
5638   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5639   // sext_inreg.
5640   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5641     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5642     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5643     if (VT.isVector())
5644       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5645                                ExtVT, VT.getVectorNumElements());
5646     if ((!LegalOperations ||
5647          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5648       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5649                          N0.getOperand(0), DAG.getValueType(ExtVT));
5650   }
5651 
5652   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5653   if (N0.getOpcode() == ISD::SRA) {
5654     SDLoc DL(N);
5655     EVT ShiftVT = N1.getValueType();
5656 
5657     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5658                                           ConstantSDNode *RHS) {
5659       APInt c1 = LHS->getAPIntValue();
5660       APInt c2 = RHS->getAPIntValue();
5661       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5662       return (c1 + c2).uge(OpSizeInBits);
5663     };
5664     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5665       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5666                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5667 
5668     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5669                                        ConstantSDNode *RHS) {
5670       APInt c1 = LHS->getAPIntValue();
5671       APInt c2 = RHS->getAPIntValue();
5672       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5673       return (c1 + c2).ult(OpSizeInBits);
5674     };
5675     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5676       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5677       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5678     }
5679   }
5680 
5681   // fold (sra (shl X, m), (sub result_size, n))
5682   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5683   // result_size - n != m.
5684   // If truncate is free for the target sext(shl) is likely to result in better
5685   // code.
5686   if (N0.getOpcode() == ISD::SHL && N1C) {
5687     // Get the two constanst of the shifts, CN0 = m, CN = n.
5688     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5689     if (N01C) {
5690       LLVMContext &Ctx = *DAG.getContext();
5691       // Determine what the truncate's result bitsize and type would be.
5692       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5693 
5694       if (VT.isVector())
5695         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5696 
5697       // Determine the residual right-shift amount.
5698       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5699 
5700       // If the shift is not a no-op (in which case this should be just a sign
5701       // extend already), the truncated to type is legal, sign_extend is legal
5702       // on that type, and the truncate to that type is both legal and free,
5703       // perform the transform.
5704       if ((ShiftAmt > 0) &&
5705           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5706           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5707           TLI.isTruncateFree(VT, TruncVT)) {
5708 
5709         SDLoc DL(N);
5710         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5711             getShiftAmountTy(N0.getOperand(0).getValueType()));
5712         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5713                                     N0.getOperand(0), Amt);
5714         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5715                                     Shift);
5716         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5717                            N->getValueType(0), Trunc);
5718       }
5719     }
5720   }
5721 
5722   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5723   if (N1.getOpcode() == ISD::TRUNCATE &&
5724       N1.getOperand(0).getOpcode() == ISD::AND) {
5725     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5726       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5727   }
5728 
5729   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5730   //      if c1 is equal to the number of bits the trunc removes
5731   if (N0.getOpcode() == ISD::TRUNCATE &&
5732       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5733        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5734       N0.getOperand(0).hasOneUse() &&
5735       N0.getOperand(0).getOperand(1).hasOneUse() &&
5736       N1C) {
5737     SDValue N0Op0 = N0.getOperand(0);
5738     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5739       unsigned LargeShiftVal = LargeShift->getZExtValue();
5740       EVT LargeVT = N0Op0.getValueType();
5741 
5742       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5743         SDLoc DL(N);
5744         SDValue Amt =
5745           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5746                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5747         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5748                                   N0Op0.getOperand(0), Amt);
5749         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5750       }
5751     }
5752   }
5753 
5754   // Simplify, based on bits shifted out of the LHS.
5755   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5756     return SDValue(N, 0);
5757 
5758 
5759   // If the sign bit is known to be zero, switch this to a SRL.
5760   if (DAG.SignBitIsZero(N0))
5761     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5762 
5763   if (N1C && !N1C->isOpaque())
5764     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5765       return NewSRA;
5766 
5767   return SDValue();
5768 }
5769 
5770 SDValue DAGCombiner::visitSRL(SDNode *N) {
5771   SDValue N0 = N->getOperand(0);
5772   SDValue N1 = N->getOperand(1);
5773   EVT VT = N0.getValueType();
5774   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5775 
5776   // fold vector ops
5777   if (VT.isVector())
5778     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5779       return FoldedVOp;
5780 
5781   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5782 
5783   // fold (srl c1, c2) -> c1 >>u c2
5784   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5785   if (N0C && N1C && !N1C->isOpaque())
5786     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5787   // fold (srl 0, x) -> 0
5788   if (isNullConstantOrNullSplatConstant(N0))
5789     return N0;
5790   // fold (srl x, c >= size(x)) -> undef
5791   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5792   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5793     return Val->getAPIntValue().uge(OpSizeInBits);
5794   };
5795   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5796     return DAG.getUNDEF(VT);
5797   // fold (srl x, 0) -> x
5798   if (N1C && N1C->isNullValue())
5799     return N0;
5800 
5801   if (SDValue NewSel = foldBinOpIntoSelect(N))
5802     return NewSel;
5803 
5804   // if (srl x, c) is known to be zero, return 0
5805   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5806                                    APInt::getAllOnesValue(OpSizeInBits)))
5807     return DAG.getConstant(0, SDLoc(N), VT);
5808 
5809   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5810   if (N0.getOpcode() == ISD::SRL) {
5811     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5812                                           ConstantSDNode *RHS) {
5813       APInt c1 = LHS->getAPIntValue();
5814       APInt c2 = RHS->getAPIntValue();
5815       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5816       return (c1 + c2).uge(OpSizeInBits);
5817     };
5818     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5819       return DAG.getConstant(0, SDLoc(N), VT);
5820 
5821     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5822                                        ConstantSDNode *RHS) {
5823       APInt c1 = LHS->getAPIntValue();
5824       APInt c2 = RHS->getAPIntValue();
5825       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5826       return (c1 + c2).ult(OpSizeInBits);
5827     };
5828     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5829       SDLoc DL(N);
5830       EVT ShiftVT = N1.getValueType();
5831       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5832       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
5833     }
5834   }
5835 
5836   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5837   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5838       N0.getOperand(0).getOpcode() == ISD::SRL) {
5839     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5840       uint64_t c1 = N001C->getZExtValue();
5841       uint64_t c2 = N1C->getZExtValue();
5842       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5843       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5844       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5845       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5846       if (c1 + OpSizeInBits == InnerShiftSize) {
5847         SDLoc DL(N0);
5848         if (c1 + c2 >= InnerShiftSize)
5849           return DAG.getConstant(0, DL, VT);
5850         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5851                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5852                                        N0.getOperand(0).getOperand(0),
5853                                        DAG.getConstant(c1 + c2, DL,
5854                                                        ShiftCountVT)));
5855       }
5856     }
5857   }
5858 
5859   // fold (srl (shl x, c), c) -> (and x, cst2)
5860   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5861       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5862     SDLoc DL(N);
5863     SDValue Mask =
5864         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5865     AddToWorklist(Mask.getNode());
5866     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5867   }
5868 
5869   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5870   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5871     // Shifting in all undef bits?
5872     EVT SmallVT = N0.getOperand(0).getValueType();
5873     unsigned BitSize = SmallVT.getScalarSizeInBits();
5874     if (N1C->getZExtValue() >= BitSize)
5875       return DAG.getUNDEF(VT);
5876 
5877     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5878       uint64_t ShiftAmt = N1C->getZExtValue();
5879       SDLoc DL0(N0);
5880       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5881                                        N0.getOperand(0),
5882                           DAG.getConstant(ShiftAmt, DL0,
5883                                           getShiftAmountTy(SmallVT)));
5884       AddToWorklist(SmallShift.getNode());
5885       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5886       SDLoc DL(N);
5887       return DAG.getNode(ISD::AND, DL, VT,
5888                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5889                          DAG.getConstant(Mask, DL, VT));
5890     }
5891   }
5892 
5893   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5894   // bit, which is unmodified by sra.
5895   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5896     if (N0.getOpcode() == ISD::SRA)
5897       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5898   }
5899 
5900   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5901   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5902       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5903     KnownBits Known;
5904     DAG.computeKnownBits(N0.getOperand(0), Known);
5905 
5906     // If any of the input bits are KnownOne, then the input couldn't be all
5907     // zeros, thus the result of the srl will always be zero.
5908     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5909 
5910     // If all of the bits input the to ctlz node are known to be zero, then
5911     // the result of the ctlz is "32" and the result of the shift is one.
5912     APInt UnknownBits = ~Known.Zero;
5913     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5914 
5915     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5916     if (UnknownBits.isPowerOf2()) {
5917       // Okay, we know that only that the single bit specified by UnknownBits
5918       // could be set on input to the CTLZ node. If this bit is set, the SRL
5919       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5920       // to an SRL/XOR pair, which is likely to simplify more.
5921       unsigned ShAmt = UnknownBits.countTrailingZeros();
5922       SDValue Op = N0.getOperand(0);
5923 
5924       if (ShAmt) {
5925         SDLoc DL(N0);
5926         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5927                   DAG.getConstant(ShAmt, DL,
5928                                   getShiftAmountTy(Op.getValueType())));
5929         AddToWorklist(Op.getNode());
5930       }
5931 
5932       SDLoc DL(N);
5933       return DAG.getNode(ISD::XOR, DL, VT,
5934                          Op, DAG.getConstant(1, DL, VT));
5935     }
5936   }
5937 
5938   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5939   if (N1.getOpcode() == ISD::TRUNCATE &&
5940       N1.getOperand(0).getOpcode() == ISD::AND) {
5941     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5942       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5943   }
5944 
5945   // fold operands of srl based on knowledge that the low bits are not
5946   // demanded.
5947   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5948     return SDValue(N, 0);
5949 
5950   if (N1C && !N1C->isOpaque())
5951     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5952       return NewSRL;
5953 
5954   // Attempt to convert a srl of a load into a narrower zero-extending load.
5955   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5956     return NarrowLoad;
5957 
5958   // Here is a common situation. We want to optimize:
5959   //
5960   //   %a = ...
5961   //   %b = and i32 %a, 2
5962   //   %c = srl i32 %b, 1
5963   //   brcond i32 %c ...
5964   //
5965   // into
5966   //
5967   //   %a = ...
5968   //   %b = and %a, 2
5969   //   %c = setcc eq %b, 0
5970   //   brcond %c ...
5971   //
5972   // However when after the source operand of SRL is optimized into AND, the SRL
5973   // itself may not be optimized further. Look for it and add the BRCOND into
5974   // the worklist.
5975   if (N->hasOneUse()) {
5976     SDNode *Use = *N->use_begin();
5977     if (Use->getOpcode() == ISD::BRCOND)
5978       AddToWorklist(Use);
5979     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5980       // Also look pass the truncate.
5981       Use = *Use->use_begin();
5982       if (Use->getOpcode() == ISD::BRCOND)
5983         AddToWorklist(Use);
5984     }
5985   }
5986 
5987   return SDValue();
5988 }
5989 
5990 SDValue DAGCombiner::visitABS(SDNode *N) {
5991   SDValue N0 = N->getOperand(0);
5992   EVT VT = N->getValueType(0);
5993 
5994   // fold (abs c1) -> c2
5995   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5996     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5997   // fold (abs (abs x)) -> (abs x)
5998   if (N0.getOpcode() == ISD::ABS)
5999     return N0;
6000   // fold (abs x) -> x iff not-negative
6001   if (DAG.SignBitIsZero(N0))
6002     return N0;
6003   return SDValue();
6004 }
6005 
6006 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6007   SDValue N0 = N->getOperand(0);
6008   EVT VT = N->getValueType(0);
6009 
6010   // fold (bswap c1) -> c2
6011   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6012     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6013   // fold (bswap (bswap x)) -> x
6014   if (N0.getOpcode() == ISD::BSWAP)
6015     return N0->getOperand(0);
6016   return SDValue();
6017 }
6018 
6019 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6020   SDValue N0 = N->getOperand(0);
6021   EVT VT = N->getValueType(0);
6022 
6023   // fold (bitreverse c1) -> c2
6024   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6025     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6026   // fold (bitreverse (bitreverse x)) -> x
6027   if (N0.getOpcode() == ISD::BITREVERSE)
6028     return N0.getOperand(0);
6029   return SDValue();
6030 }
6031 
6032 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6033   SDValue N0 = N->getOperand(0);
6034   EVT VT = N->getValueType(0);
6035 
6036   // fold (ctlz c1) -> c2
6037   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6038     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6039   return SDValue();
6040 }
6041 
6042 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6043   SDValue N0 = N->getOperand(0);
6044   EVT VT = N->getValueType(0);
6045 
6046   // fold (ctlz_zero_undef c1) -> c2
6047   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6048     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6049   return SDValue();
6050 }
6051 
6052 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6053   SDValue N0 = N->getOperand(0);
6054   EVT VT = N->getValueType(0);
6055 
6056   // fold (cttz c1) -> c2
6057   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6058     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6059   return SDValue();
6060 }
6061 
6062 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6063   SDValue N0 = N->getOperand(0);
6064   EVT VT = N->getValueType(0);
6065 
6066   // fold (cttz_zero_undef c1) -> c2
6067   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6068     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6069   return SDValue();
6070 }
6071 
6072 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6073   SDValue N0 = N->getOperand(0);
6074   EVT VT = N->getValueType(0);
6075 
6076   // fold (ctpop c1) -> c2
6077   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6078     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6079   return SDValue();
6080 }
6081 
6082 
6083 /// \brief Generate Min/Max node
6084 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6085                                    SDValue RHS, SDValue True, SDValue False,
6086                                    ISD::CondCode CC, const TargetLowering &TLI,
6087                                    SelectionDAG &DAG) {
6088   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6089     return SDValue();
6090 
6091   switch (CC) {
6092   case ISD::SETOLT:
6093   case ISD::SETOLE:
6094   case ISD::SETLT:
6095   case ISD::SETLE:
6096   case ISD::SETULT:
6097   case ISD::SETULE: {
6098     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6099     if (TLI.isOperationLegal(Opcode, VT))
6100       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6101     return SDValue();
6102   }
6103   case ISD::SETOGT:
6104   case ISD::SETOGE:
6105   case ISD::SETGT:
6106   case ISD::SETGE:
6107   case ISD::SETUGT:
6108   case ISD::SETUGE: {
6109     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6110     if (TLI.isOperationLegal(Opcode, VT))
6111       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6112     return SDValue();
6113   }
6114   default:
6115     return SDValue();
6116   }
6117 }
6118 
6119 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6120   SDValue Cond = N->getOperand(0);
6121   SDValue N1 = N->getOperand(1);
6122   SDValue N2 = N->getOperand(2);
6123   EVT VT = N->getValueType(0);
6124   EVT CondVT = Cond.getValueType();
6125   SDLoc DL(N);
6126 
6127   if (!VT.isInteger())
6128     return SDValue();
6129 
6130   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6131   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6132   if (!C1 || !C2)
6133     return SDValue();
6134 
6135   // Only do this before legalization to avoid conflicting with target-specific
6136   // transforms in the other direction (create a select from a zext/sext). There
6137   // is also a target-independent combine here in DAGCombiner in the other
6138   // direction for (select Cond, -1, 0) when the condition is not i1.
6139   if (CondVT == MVT::i1 && !LegalOperations) {
6140     if (C1->isNullValue() && C2->isOne()) {
6141       // select Cond, 0, 1 --> zext (!Cond)
6142       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6143       if (VT != MVT::i1)
6144         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6145       return NotCond;
6146     }
6147     if (C1->isNullValue() && C2->isAllOnesValue()) {
6148       // select Cond, 0, -1 --> sext (!Cond)
6149       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6150       if (VT != MVT::i1)
6151         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6152       return NotCond;
6153     }
6154     if (C1->isOne() && C2->isNullValue()) {
6155       // select Cond, 1, 0 --> zext (Cond)
6156       if (VT != MVT::i1)
6157         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6158       return Cond;
6159     }
6160     if (C1->isAllOnesValue() && C2->isNullValue()) {
6161       // select Cond, -1, 0 --> sext (Cond)
6162       if (VT != MVT::i1)
6163         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6164       return Cond;
6165     }
6166 
6167     // For any constants that differ by 1, we can transform the select into an
6168     // extend and add. Use a target hook because some targets may prefer to
6169     // transform in the other direction.
6170     if (TLI.convertSelectOfConstantsToMath()) {
6171       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6172         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6173         if (VT != MVT::i1)
6174           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6175         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6176       }
6177       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6178         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6179         if (VT != MVT::i1)
6180           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6181         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6182       }
6183     }
6184 
6185     return SDValue();
6186   }
6187 
6188   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6189   // We can't do this reliably if integer based booleans have different contents
6190   // to floating point based booleans. This is because we can't tell whether we
6191   // have an integer-based boolean or a floating-point-based boolean unless we
6192   // can find the SETCC that produced it and inspect its operands. This is
6193   // fairly easy if C is the SETCC node, but it can potentially be
6194   // undiscoverable (or not reasonably discoverable). For example, it could be
6195   // in another basic block or it could require searching a complicated
6196   // expression.
6197   if (CondVT.isInteger() &&
6198       TLI.getBooleanContents(false, true) ==
6199           TargetLowering::ZeroOrOneBooleanContent &&
6200       TLI.getBooleanContents(false, false) ==
6201           TargetLowering::ZeroOrOneBooleanContent &&
6202       C1->isNullValue() && C2->isOne()) {
6203     SDValue NotCond =
6204         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6205     if (VT.bitsEq(CondVT))
6206       return NotCond;
6207     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6208   }
6209 
6210   return SDValue();
6211 }
6212 
6213 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6214   SDValue N0 = N->getOperand(0);
6215   SDValue N1 = N->getOperand(1);
6216   SDValue N2 = N->getOperand(2);
6217   EVT VT = N->getValueType(0);
6218   EVT VT0 = N0.getValueType();
6219   SDLoc DL(N);
6220 
6221   // fold (select C, X, X) -> X
6222   if (N1 == N2)
6223     return N1;
6224 
6225   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6226     // fold (select true, X, Y) -> X
6227     // fold (select false, X, Y) -> Y
6228     return !N0C->isNullValue() ? N1 : N2;
6229   }
6230 
6231   // fold (select X, X, Y) -> (or X, Y)
6232   // fold (select X, 1, Y) -> (or C, Y)
6233   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6234     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6235 
6236   if (SDValue V = foldSelectOfConstants(N))
6237     return V;
6238 
6239   // fold (select C, 0, X) -> (and (not C), X)
6240   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6241     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6242     AddToWorklist(NOTNode.getNode());
6243     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6244   }
6245   // fold (select C, X, 1) -> (or (not C), X)
6246   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6247     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6248     AddToWorklist(NOTNode.getNode());
6249     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6250   }
6251   // fold (select X, Y, X) -> (and X, Y)
6252   // fold (select X, Y, 0) -> (and X, Y)
6253   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6254     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6255 
6256   // If we can fold this based on the true/false value, do so.
6257   if (SimplifySelectOps(N, N1, N2))
6258     return SDValue(N, 0); // Don't revisit N.
6259 
6260   if (VT0 == MVT::i1) {
6261     // The code in this block deals with the following 2 equivalences:
6262     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6263     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6264     // The target can specify its preferred form with the
6265     // shouldNormalizeToSelectSequence() callback. However we always transform
6266     // to the right anyway if we find the inner select exists in the DAG anyway
6267     // and we always transform to the left side if we know that we can further
6268     // optimize the combination of the conditions.
6269     bool normalizeToSequence =
6270         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6271     // select (and Cond0, Cond1), X, Y
6272     //   -> select Cond0, (select Cond1, X, Y), Y
6273     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6274       SDValue Cond0 = N0->getOperand(0);
6275       SDValue Cond1 = N0->getOperand(1);
6276       SDValue InnerSelect =
6277           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6278       if (normalizeToSequence || !InnerSelect.use_empty())
6279         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6280                            InnerSelect, N2);
6281     }
6282     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6283     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6284       SDValue Cond0 = N0->getOperand(0);
6285       SDValue Cond1 = N0->getOperand(1);
6286       SDValue InnerSelect =
6287           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6288       if (normalizeToSequence || !InnerSelect.use_empty())
6289         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6290                            InnerSelect);
6291     }
6292 
6293     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6294     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6295       SDValue N1_0 = N1->getOperand(0);
6296       SDValue N1_1 = N1->getOperand(1);
6297       SDValue N1_2 = N1->getOperand(2);
6298       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6299         // Create the actual and node if we can generate good code for it.
6300         if (!normalizeToSequence) {
6301           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6302           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6303         }
6304         // Otherwise see if we can optimize the "and" to a better pattern.
6305         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6306           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6307                              N2);
6308       }
6309     }
6310     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6311     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6312       SDValue N2_0 = N2->getOperand(0);
6313       SDValue N2_1 = N2->getOperand(1);
6314       SDValue N2_2 = N2->getOperand(2);
6315       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6316         // Create the actual or node if we can generate good code for it.
6317         if (!normalizeToSequence) {
6318           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6319           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6320         }
6321         // Otherwise see if we can optimize to a better pattern.
6322         if (SDValue Combined = visitORLike(N0, N2_0, N))
6323           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6324                              N2_2);
6325       }
6326     }
6327   }
6328 
6329   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6330   if (VT0 == MVT::i1) {
6331     if (N0->getOpcode() == ISD::XOR) {
6332       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6333         SDValue Cond0 = N0->getOperand(0);
6334         if (C->isOne())
6335           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6336       }
6337     }
6338   }
6339 
6340   // fold selects based on a setcc into other things, such as min/max/abs
6341   if (N0.getOpcode() == ISD::SETCC) {
6342     // select x, y (fcmp lt x, y) -> fminnum x, y
6343     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6344     //
6345     // This is OK if we don't care about what happens if either operand is a
6346     // NaN.
6347     //
6348 
6349     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6350     // no signed zeros as well as no nans.
6351     const TargetOptions &Options = DAG.getTarget().Options;
6352     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6353         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6354       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6355 
6356       if (SDValue FMinMax = combineMinNumMaxNum(
6357               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6358         return FMinMax;
6359     }
6360 
6361     if ((!LegalOperations &&
6362          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6363         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6364       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6365                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6366     return SimplifySelect(DL, N0, N1, N2);
6367   }
6368 
6369   return SDValue();
6370 }
6371 
6372 static
6373 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6374   SDLoc DL(N);
6375   EVT LoVT, HiVT;
6376   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6377 
6378   // Split the inputs.
6379   SDValue Lo, Hi, LL, LH, RL, RH;
6380   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6381   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6382 
6383   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6384   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6385 
6386   return std::make_pair(Lo, Hi);
6387 }
6388 
6389 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6390 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6391 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6392   SDLoc DL(N);
6393   SDValue Cond = N->getOperand(0);
6394   SDValue LHS = N->getOperand(1);
6395   SDValue RHS = N->getOperand(2);
6396   EVT VT = N->getValueType(0);
6397   int NumElems = VT.getVectorNumElements();
6398   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6399          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6400          Cond.getOpcode() == ISD::BUILD_VECTOR);
6401 
6402   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6403   // binary ones here.
6404   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6405     return SDValue();
6406 
6407   // We're sure we have an even number of elements due to the
6408   // concat_vectors we have as arguments to vselect.
6409   // Skip BV elements until we find one that's not an UNDEF
6410   // After we find an UNDEF element, keep looping until we get to half the
6411   // length of the BV and see if all the non-undef nodes are the same.
6412   ConstantSDNode *BottomHalf = nullptr;
6413   for (int i = 0; i < NumElems / 2; ++i) {
6414     if (Cond->getOperand(i)->isUndef())
6415       continue;
6416 
6417     if (BottomHalf == nullptr)
6418       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6419     else if (Cond->getOperand(i).getNode() != BottomHalf)
6420       return SDValue();
6421   }
6422 
6423   // Do the same for the second half of the BuildVector
6424   ConstantSDNode *TopHalf = nullptr;
6425   for (int i = NumElems / 2; i < NumElems; ++i) {
6426     if (Cond->getOperand(i)->isUndef())
6427       continue;
6428 
6429     if (TopHalf == nullptr)
6430       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6431     else if (Cond->getOperand(i).getNode() != TopHalf)
6432       return SDValue();
6433   }
6434 
6435   assert(TopHalf && BottomHalf &&
6436          "One half of the selector was all UNDEFs and the other was all the "
6437          "same value. This should have been addressed before this function.");
6438   return DAG.getNode(
6439       ISD::CONCAT_VECTORS, DL, VT,
6440       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6441       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6442 }
6443 
6444 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6445 
6446   if (Level >= AfterLegalizeTypes)
6447     return SDValue();
6448 
6449   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6450   SDValue Mask = MSC->getMask();
6451   SDValue Data  = MSC->getValue();
6452   SDLoc DL(N);
6453 
6454   // If the MSCATTER data type requires splitting and the mask is provided by a
6455   // SETCC, then split both nodes and its operands before legalization. This
6456   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6457   // and enables future optimizations (e.g. min/max pattern matching on X86).
6458   if (Mask.getOpcode() != ISD::SETCC)
6459     return SDValue();
6460 
6461   // Check if any splitting is required.
6462   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6463       TargetLowering::TypeSplitVector)
6464     return SDValue();
6465   SDValue MaskLo, MaskHi, Lo, Hi;
6466   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6467 
6468   EVT LoVT, HiVT;
6469   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6470 
6471   SDValue Chain = MSC->getChain();
6472 
6473   EVT MemoryVT = MSC->getMemoryVT();
6474   unsigned Alignment = MSC->getOriginalAlignment();
6475 
6476   EVT LoMemVT, HiMemVT;
6477   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6478 
6479   SDValue DataLo, DataHi;
6480   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6481 
6482   SDValue BasePtr = MSC->getBasePtr();
6483   SDValue IndexLo, IndexHi;
6484   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6485 
6486   MachineMemOperand *MMO = DAG.getMachineFunction().
6487     getMachineMemOperand(MSC->getPointerInfo(),
6488                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6489                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6490 
6491   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6492   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6493                             DL, OpsLo, MMO);
6494 
6495   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6496   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6497                             DL, OpsHi, MMO);
6498 
6499   AddToWorklist(Lo.getNode());
6500   AddToWorklist(Hi.getNode());
6501 
6502   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6503 }
6504 
6505 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6506 
6507   if (Level >= AfterLegalizeTypes)
6508     return SDValue();
6509 
6510   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6511   SDValue Mask = MST->getMask();
6512   SDValue Data  = MST->getValue();
6513   EVT VT = Data.getValueType();
6514   SDLoc DL(N);
6515 
6516   // If the MSTORE data type requires splitting and the mask is provided by a
6517   // SETCC, then split both nodes and its operands before legalization. This
6518   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6519   // and enables future optimizations (e.g. min/max pattern matching on X86).
6520   if (Mask.getOpcode() == ISD::SETCC) {
6521 
6522     // Check if any splitting is required.
6523     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6524         TargetLowering::TypeSplitVector)
6525       return SDValue();
6526 
6527     SDValue MaskLo, MaskHi, Lo, Hi;
6528     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6529 
6530     SDValue Chain = MST->getChain();
6531     SDValue Ptr   = MST->getBasePtr();
6532 
6533     EVT MemoryVT = MST->getMemoryVT();
6534     unsigned Alignment = MST->getOriginalAlignment();
6535 
6536     // if Alignment is equal to the vector size,
6537     // take the half of it for the second part
6538     unsigned SecondHalfAlignment =
6539       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6540 
6541     EVT LoMemVT, HiMemVT;
6542     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6543 
6544     SDValue DataLo, DataHi;
6545     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6546 
6547     MachineMemOperand *MMO = DAG.getMachineFunction().
6548       getMachineMemOperand(MST->getPointerInfo(),
6549                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6550                            Alignment, MST->getAAInfo(), MST->getRanges());
6551 
6552     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6553                             MST->isTruncatingStore(),
6554                             MST->isCompressingStore());
6555 
6556     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6557                                      MST->isCompressingStore());
6558 
6559     MMO = DAG.getMachineFunction().
6560       getMachineMemOperand(MST->getPointerInfo(),
6561                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6562                            SecondHalfAlignment, MST->getAAInfo(),
6563                            MST->getRanges());
6564 
6565     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6566                             MST->isTruncatingStore(),
6567                             MST->isCompressingStore());
6568 
6569     AddToWorklist(Lo.getNode());
6570     AddToWorklist(Hi.getNode());
6571 
6572     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6573   }
6574   return SDValue();
6575 }
6576 
6577 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6578 
6579   if (Level >= AfterLegalizeTypes)
6580     return SDValue();
6581 
6582   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6583   SDValue Mask = MGT->getMask();
6584   SDLoc DL(N);
6585 
6586   // If the MGATHER result requires splitting and the mask is provided by a
6587   // SETCC, then split both nodes and its operands before legalization. This
6588   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6589   // and enables future optimizations (e.g. min/max pattern matching on X86).
6590 
6591   if (Mask.getOpcode() != ISD::SETCC)
6592     return SDValue();
6593 
6594   EVT VT = N->getValueType(0);
6595 
6596   // Check if any splitting is required.
6597   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6598       TargetLowering::TypeSplitVector)
6599     return SDValue();
6600 
6601   SDValue MaskLo, MaskHi, Lo, Hi;
6602   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6603 
6604   SDValue Src0 = MGT->getValue();
6605   SDValue Src0Lo, Src0Hi;
6606   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6607 
6608   EVT LoVT, HiVT;
6609   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6610 
6611   SDValue Chain = MGT->getChain();
6612   EVT MemoryVT = MGT->getMemoryVT();
6613   unsigned Alignment = MGT->getOriginalAlignment();
6614 
6615   EVT LoMemVT, HiMemVT;
6616   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6617 
6618   SDValue BasePtr = MGT->getBasePtr();
6619   SDValue Index = MGT->getIndex();
6620   SDValue IndexLo, IndexHi;
6621   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6622 
6623   MachineMemOperand *MMO = DAG.getMachineFunction().
6624     getMachineMemOperand(MGT->getPointerInfo(),
6625                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6626                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6627 
6628   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6629   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6630                             MMO);
6631 
6632   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6633   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6634                             MMO);
6635 
6636   AddToWorklist(Lo.getNode());
6637   AddToWorklist(Hi.getNode());
6638 
6639   // Build a factor node to remember that this load is independent of the
6640   // other one.
6641   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6642                       Hi.getValue(1));
6643 
6644   // Legalized the chain result - switch anything that used the old chain to
6645   // use the new one.
6646   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6647 
6648   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6649 
6650   SDValue RetOps[] = { GatherRes, Chain };
6651   return DAG.getMergeValues(RetOps, DL);
6652 }
6653 
6654 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6655 
6656   if (Level >= AfterLegalizeTypes)
6657     return SDValue();
6658 
6659   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6660   SDValue Mask = MLD->getMask();
6661   SDLoc DL(N);
6662 
6663   // If the MLOAD result requires splitting and the mask is provided by a
6664   // SETCC, then split both nodes and its operands before legalization. This
6665   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6666   // and enables future optimizations (e.g. min/max pattern matching on X86).
6667 
6668   if (Mask.getOpcode() == ISD::SETCC) {
6669     EVT VT = N->getValueType(0);
6670 
6671     // Check if any splitting is required.
6672     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6673         TargetLowering::TypeSplitVector)
6674       return SDValue();
6675 
6676     SDValue MaskLo, MaskHi, Lo, Hi;
6677     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6678 
6679     SDValue Src0 = MLD->getSrc0();
6680     SDValue Src0Lo, Src0Hi;
6681     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6682 
6683     EVT LoVT, HiVT;
6684     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6685 
6686     SDValue Chain = MLD->getChain();
6687     SDValue Ptr   = MLD->getBasePtr();
6688     EVT MemoryVT = MLD->getMemoryVT();
6689     unsigned Alignment = MLD->getOriginalAlignment();
6690 
6691     // if Alignment is equal to the vector size,
6692     // take the half of it for the second part
6693     unsigned SecondHalfAlignment =
6694       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6695          Alignment/2 : Alignment;
6696 
6697     EVT LoMemVT, HiMemVT;
6698     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6699 
6700     MachineMemOperand *MMO = DAG.getMachineFunction().
6701     getMachineMemOperand(MLD->getPointerInfo(),
6702                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6703                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6704 
6705     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6706                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6707 
6708     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6709                                      MLD->isExpandingLoad());
6710 
6711     MMO = DAG.getMachineFunction().
6712     getMachineMemOperand(MLD->getPointerInfo(),
6713                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6714                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6715 
6716     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6717                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6718 
6719     AddToWorklist(Lo.getNode());
6720     AddToWorklist(Hi.getNode());
6721 
6722     // Build a factor node to remember that this load is independent of the
6723     // other one.
6724     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6725                         Hi.getValue(1));
6726 
6727     // Legalized the chain result - switch anything that used the old chain to
6728     // use the new one.
6729     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6730 
6731     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6732 
6733     SDValue RetOps[] = { LoadRes, Chain };
6734     return DAG.getMergeValues(RetOps, DL);
6735   }
6736   return SDValue();
6737 }
6738 
6739 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6740   SDValue N0 = N->getOperand(0);
6741   SDValue N1 = N->getOperand(1);
6742   SDValue N2 = N->getOperand(2);
6743   SDLoc DL(N);
6744 
6745   // fold (vselect C, X, X) -> X
6746   if (N1 == N2)
6747     return N1;
6748 
6749   // Canonicalize integer abs.
6750   // vselect (setg[te] X,  0),  X, -X ->
6751   // vselect (setgt    X, -1),  X, -X ->
6752   // vselect (setl[te] X,  0), -X,  X ->
6753   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6754   if (N0.getOpcode() == ISD::SETCC) {
6755     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6756     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6757     bool isAbs = false;
6758     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6759 
6760     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6761          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6762         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6763       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6764     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6765              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6766       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6767 
6768     if (isAbs) {
6769       EVT VT = LHS.getValueType();
6770       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6771         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6772 
6773       SDValue Shift = DAG.getNode(
6774           ISD::SRA, DL, VT, LHS,
6775           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6776       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6777       AddToWorklist(Shift.getNode());
6778       AddToWorklist(Add.getNode());
6779       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6780     }
6781   }
6782 
6783   if (SimplifySelectOps(N, N1, N2))
6784     return SDValue(N, 0);  // Don't revisit N.
6785 
6786   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6787   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6788     return N1;
6789   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6790   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6791     return N2;
6792 
6793   // The ConvertSelectToConcatVector function is assuming both the above
6794   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6795   // and addressed.
6796   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6797       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6798       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6799     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6800       return CV;
6801   }
6802 
6803   return SDValue();
6804 }
6805 
6806 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6807   SDValue N0 = N->getOperand(0);
6808   SDValue N1 = N->getOperand(1);
6809   SDValue N2 = N->getOperand(2);
6810   SDValue N3 = N->getOperand(3);
6811   SDValue N4 = N->getOperand(4);
6812   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6813 
6814   // fold select_cc lhs, rhs, x, x, cc -> x
6815   if (N2 == N3)
6816     return N2;
6817 
6818   // Determine if the condition we're dealing with is constant
6819   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6820                                   CC, SDLoc(N), false)) {
6821     AddToWorklist(SCC.getNode());
6822 
6823     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6824       if (!SCCC->isNullValue())
6825         return N2;    // cond always true -> true val
6826       else
6827         return N3;    // cond always false -> false val
6828     } else if (SCC->isUndef()) {
6829       // When the condition is UNDEF, just return the first operand. This is
6830       // coherent the DAG creation, no setcc node is created in this case
6831       return N2;
6832     } else if (SCC.getOpcode() == ISD::SETCC) {
6833       // Fold to a simpler select_cc
6834       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6835                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6836                          SCC.getOperand(2));
6837     }
6838   }
6839 
6840   // If we can fold this based on the true/false value, do so.
6841   if (SimplifySelectOps(N, N2, N3))
6842     return SDValue(N, 0);  // Don't revisit N.
6843 
6844   // fold select_cc into other things, such as min/max/abs
6845   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6846 }
6847 
6848 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6849   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6850                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6851                        SDLoc(N));
6852 }
6853 
6854 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6855   SDValue LHS = N->getOperand(0);
6856   SDValue RHS = N->getOperand(1);
6857   SDValue Carry = N->getOperand(2);
6858   SDValue Cond = N->getOperand(3);
6859 
6860   // If Carry is false, fold to a regular SETCC.
6861   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6862     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6863 
6864   return SDValue();
6865 }
6866 
6867 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6868   SDValue LHS = N->getOperand(0);
6869   SDValue RHS = N->getOperand(1);
6870   SDValue Carry = N->getOperand(2);
6871   SDValue Cond = N->getOperand(3);
6872 
6873   // If Carry is false, fold to a regular SETCC.
6874   if (isNullConstant(Carry))
6875     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6876 
6877   return SDValue();
6878 }
6879 
6880 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6881 /// a build_vector of constants.
6882 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6883 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6884 /// Vector extends are not folded if operations are legal; this is to
6885 /// avoid introducing illegal build_vector dag nodes.
6886 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6887                                          SelectionDAG &DAG, bool LegalTypes,
6888                                          bool LegalOperations) {
6889   unsigned Opcode = N->getOpcode();
6890   SDValue N0 = N->getOperand(0);
6891   EVT VT = N->getValueType(0);
6892 
6893   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6894          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6895          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6896          && "Expected EXTEND dag node in input!");
6897 
6898   // fold (sext c1) -> c1
6899   // fold (zext c1) -> c1
6900   // fold (aext c1) -> c1
6901   if (isa<ConstantSDNode>(N0))
6902     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6903 
6904   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6905   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6906   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6907   EVT SVT = VT.getScalarType();
6908   if (!(VT.isVector() &&
6909       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6910       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6911     return nullptr;
6912 
6913   // We can fold this node into a build_vector.
6914   unsigned VTBits = SVT.getSizeInBits();
6915   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6916   SmallVector<SDValue, 8> Elts;
6917   unsigned NumElts = VT.getVectorNumElements();
6918   SDLoc DL(N);
6919 
6920   for (unsigned i=0; i != NumElts; ++i) {
6921     SDValue Op = N0->getOperand(i);
6922     if (Op->isUndef()) {
6923       Elts.push_back(DAG.getUNDEF(SVT));
6924       continue;
6925     }
6926 
6927     SDLoc DL(Op);
6928     // Get the constant value and if needed trunc it to the size of the type.
6929     // Nodes like build_vector might have constants wider than the scalar type.
6930     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6931     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6932       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6933     else
6934       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6935   }
6936 
6937   return DAG.getBuildVector(VT, DL, Elts).getNode();
6938 }
6939 
6940 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6941 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6942 // transformation. Returns true if extension are possible and the above
6943 // mentioned transformation is profitable.
6944 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6945                                     unsigned ExtOpc,
6946                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6947                                     const TargetLowering &TLI) {
6948   bool HasCopyToRegUses = false;
6949   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6950   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6951                             UE = N0.getNode()->use_end();
6952        UI != UE; ++UI) {
6953     SDNode *User = *UI;
6954     if (User == N)
6955       continue;
6956     if (UI.getUse().getResNo() != N0.getResNo())
6957       continue;
6958     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6959     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6960       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6961       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6962         // Sign bits will be lost after a zext.
6963         return false;
6964       bool Add = false;
6965       for (unsigned i = 0; i != 2; ++i) {
6966         SDValue UseOp = User->getOperand(i);
6967         if (UseOp == N0)
6968           continue;
6969         if (!isa<ConstantSDNode>(UseOp))
6970           return false;
6971         Add = true;
6972       }
6973       if (Add)
6974         ExtendNodes.push_back(User);
6975       continue;
6976     }
6977     // If truncates aren't free and there are users we can't
6978     // extend, it isn't worthwhile.
6979     if (!isTruncFree)
6980       return false;
6981     // Remember if this value is live-out.
6982     if (User->getOpcode() == ISD::CopyToReg)
6983       HasCopyToRegUses = true;
6984   }
6985 
6986   if (HasCopyToRegUses) {
6987     bool BothLiveOut = false;
6988     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6989          UI != UE; ++UI) {
6990       SDUse &Use = UI.getUse();
6991       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6992         BothLiveOut = true;
6993         break;
6994       }
6995     }
6996     if (BothLiveOut)
6997       // Both unextended and extended values are live out. There had better be
6998       // a good reason for the transformation.
6999       return ExtendNodes.size();
7000   }
7001   return true;
7002 }
7003 
7004 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7005                                   SDValue Trunc, SDValue ExtLoad,
7006                                   const SDLoc &DL, ISD::NodeType ExtType) {
7007   // Extend SetCC uses if necessary.
7008   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7009     SDNode *SetCC = SetCCs[i];
7010     SmallVector<SDValue, 4> Ops;
7011 
7012     for (unsigned j = 0; j != 2; ++j) {
7013       SDValue SOp = SetCC->getOperand(j);
7014       if (SOp == Trunc)
7015         Ops.push_back(ExtLoad);
7016       else
7017         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7018     }
7019 
7020     Ops.push_back(SetCC->getOperand(2));
7021     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7022   }
7023 }
7024 
7025 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7026 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7027   SDValue N0 = N->getOperand(0);
7028   EVT DstVT = N->getValueType(0);
7029   EVT SrcVT = N0.getValueType();
7030 
7031   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7032           N->getOpcode() == ISD::ZERO_EXTEND) &&
7033          "Unexpected node type (not an extend)!");
7034 
7035   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7036   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7037   //   (v8i32 (sext (v8i16 (load x))))
7038   // into:
7039   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7040   //                          (v4i32 (sextload (x + 16)))))
7041   // Where uses of the original load, i.e.:
7042   //   (v8i16 (load x))
7043   // are replaced with:
7044   //   (v8i16 (truncate
7045   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7046   //                            (v4i32 (sextload (x + 16)))))))
7047   //
7048   // This combine is only applicable to illegal, but splittable, vectors.
7049   // All legal types, and illegal non-vector types, are handled elsewhere.
7050   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7051   //
7052   if (N0->getOpcode() != ISD::LOAD)
7053     return SDValue();
7054 
7055   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7056 
7057   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7058       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7059       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7060     return SDValue();
7061 
7062   SmallVector<SDNode *, 4> SetCCs;
7063   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7064     return SDValue();
7065 
7066   ISD::LoadExtType ExtType =
7067       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7068 
7069   // Try to split the vector types to get down to legal types.
7070   EVT SplitSrcVT = SrcVT;
7071   EVT SplitDstVT = DstVT;
7072   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7073          SplitSrcVT.getVectorNumElements() > 1) {
7074     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7075     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7076   }
7077 
7078   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7079     return SDValue();
7080 
7081   SDLoc DL(N);
7082   const unsigned NumSplits =
7083       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7084   const unsigned Stride = SplitSrcVT.getStoreSize();
7085   SmallVector<SDValue, 4> Loads;
7086   SmallVector<SDValue, 4> Chains;
7087 
7088   SDValue BasePtr = LN0->getBasePtr();
7089   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7090     const unsigned Offset = Idx * Stride;
7091     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7092 
7093     SDValue SplitLoad = DAG.getExtLoad(
7094         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7095         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7096         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7097 
7098     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7099                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7100 
7101     Loads.push_back(SplitLoad.getValue(0));
7102     Chains.push_back(SplitLoad.getValue(1));
7103   }
7104 
7105   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7106   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7107 
7108   // Simplify TF.
7109   AddToWorklist(NewChain.getNode());
7110 
7111   CombineTo(N, NewValue);
7112 
7113   // Replace uses of the original load (before extension)
7114   // with a truncate of the concatenated sextloaded vectors.
7115   SDValue Trunc =
7116       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7117   CombineTo(N0.getNode(), Trunc, NewChain);
7118   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7119                   (ISD::NodeType)N->getOpcode());
7120   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7121 }
7122 
7123 /// If we're narrowing or widening the result of a vector select and the final
7124 /// size is the same size as a setcc (compare) feeding the select, then try to
7125 /// apply the cast operation to the select's operands because matching vector
7126 /// sizes for a select condition and other operands should be more efficient.
7127 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7128   unsigned CastOpcode = Cast->getOpcode();
7129   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7130           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7131           CastOpcode == ISD::FP_ROUND) &&
7132          "Unexpected opcode for vector select narrowing/widening");
7133 
7134   // We only do this transform before legal ops because the pattern may be
7135   // obfuscated by target-specific operations after legalization. Do not create
7136   // an illegal select op, however, because that may be difficult to lower.
7137   EVT VT = Cast->getValueType(0);
7138   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7139     return SDValue();
7140 
7141   SDValue VSel = Cast->getOperand(0);
7142   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7143       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7144     return SDValue();
7145 
7146   // Does the setcc have the same vector size as the casted select?
7147   SDValue SetCC = VSel.getOperand(0);
7148   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7149   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7150     return SDValue();
7151 
7152   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7153   SDValue A = VSel.getOperand(1);
7154   SDValue B = VSel.getOperand(2);
7155   SDValue CastA, CastB;
7156   SDLoc DL(Cast);
7157   if (CastOpcode == ISD::FP_ROUND) {
7158     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7159     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7160     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7161   } else {
7162     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7163     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7164   }
7165   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7166 }
7167 
7168 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7169   SDValue N0 = N->getOperand(0);
7170   EVT VT = N->getValueType(0);
7171   SDLoc DL(N);
7172 
7173   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7174                                               LegalOperations))
7175     return SDValue(Res, 0);
7176 
7177   // fold (sext (sext x)) -> (sext x)
7178   // fold (sext (aext x)) -> (sext x)
7179   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7180     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7181 
7182   if (N0.getOpcode() == ISD::TRUNCATE) {
7183     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7184     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7185     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7186       SDNode *oye = N0.getOperand(0).getNode();
7187       if (NarrowLoad.getNode() != N0.getNode()) {
7188         CombineTo(N0.getNode(), NarrowLoad);
7189         // CombineTo deleted the truncate, if needed, but not what's under it.
7190         AddToWorklist(oye);
7191       }
7192       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7193     }
7194 
7195     // See if the value being truncated is already sign extended.  If so, just
7196     // eliminate the trunc/sext pair.
7197     SDValue Op = N0.getOperand(0);
7198     unsigned OpBits   = Op.getScalarValueSizeInBits();
7199     unsigned MidBits  = N0.getScalarValueSizeInBits();
7200     unsigned DestBits = VT.getScalarSizeInBits();
7201     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7202 
7203     if (OpBits == DestBits) {
7204       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7205       // bits, it is already ready.
7206       if (NumSignBits > DestBits-MidBits)
7207         return Op;
7208     } else if (OpBits < DestBits) {
7209       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7210       // bits, just sext from i32.
7211       if (NumSignBits > OpBits-MidBits)
7212         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7213     } else {
7214       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7215       // bits, just truncate to i32.
7216       if (NumSignBits > OpBits-MidBits)
7217         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7218     }
7219 
7220     // fold (sext (truncate x)) -> (sextinreg x).
7221     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7222                                                  N0.getValueType())) {
7223       if (OpBits < DestBits)
7224         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7225       else if (OpBits > DestBits)
7226         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7227       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7228                          DAG.getValueType(N0.getValueType()));
7229     }
7230   }
7231 
7232   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7233   // Only generate vector extloads when 1) they're legal, and 2) they are
7234   // deemed desirable by the target.
7235   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7236       ((!LegalOperations && !VT.isVector() &&
7237         !cast<LoadSDNode>(N0)->isVolatile()) ||
7238        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7239     bool DoXform = true;
7240     SmallVector<SDNode*, 4> SetCCs;
7241     if (!N0.hasOneUse())
7242       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7243     if (VT.isVector())
7244       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7245     if (DoXform) {
7246       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7247       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7248                                        LN0->getBasePtr(), N0.getValueType(),
7249                                        LN0->getMemOperand());
7250       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7251                                   N0.getValueType(), ExtLoad);
7252       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7253       // If the load value is used only by N, replace it via CombineTo N.
7254       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7255       CombineTo(N, ExtLoad);
7256       if (NoReplaceTrunc)
7257         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7258       else
7259         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7260       return SDValue(N, 0);
7261     }
7262   }
7263 
7264   // fold (sext (load x)) to multiple smaller sextloads.
7265   // Only on illegal but splittable vectors.
7266   if (SDValue ExtLoad = CombineExtLoad(N))
7267     return ExtLoad;
7268 
7269   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7270   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7271   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7272       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7273     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7274     EVT MemVT = LN0->getMemoryVT();
7275     if ((!LegalOperations && !LN0->isVolatile()) ||
7276         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7277       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7278                                        LN0->getBasePtr(), MemVT,
7279                                        LN0->getMemOperand());
7280       CombineTo(N, ExtLoad);
7281       CombineTo(N0.getNode(),
7282                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7283                             N0.getValueType(), ExtLoad),
7284                 ExtLoad.getValue(1));
7285       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7286     }
7287   }
7288 
7289   // fold (sext (and/or/xor (load x), cst)) ->
7290   //      (and/or/xor (sextload x), (sext cst))
7291   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7292        N0.getOpcode() == ISD::XOR) &&
7293       isa<LoadSDNode>(N0.getOperand(0)) &&
7294       N0.getOperand(1).getOpcode() == ISD::Constant &&
7295       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7296       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7297     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7298     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7299       bool DoXform = true;
7300       SmallVector<SDNode*, 4> SetCCs;
7301       if (!N0.hasOneUse())
7302         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7303                                           SetCCs, TLI);
7304       if (DoXform) {
7305         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7306                                          LN0->getChain(), LN0->getBasePtr(),
7307                                          LN0->getMemoryVT(),
7308                                          LN0->getMemOperand());
7309         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7310         Mask = Mask.sext(VT.getSizeInBits());
7311         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7312                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7313         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7314                                     SDLoc(N0.getOperand(0)),
7315                                     N0.getOperand(0).getValueType(), ExtLoad);
7316         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7317         bool NoReplaceTruncAnd = !N0.hasOneUse();
7318         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7319         CombineTo(N, And);
7320         // If N0 has multiple uses, change other uses as well.
7321         if (NoReplaceTruncAnd) {
7322           SDValue TruncAnd =
7323               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7324           CombineTo(N0.getNode(), TruncAnd);
7325         }
7326         if (NoReplaceTrunc)
7327           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7328         else
7329           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7330         return SDValue(N,0); // Return N so it doesn't get rechecked!
7331       }
7332     }
7333   }
7334 
7335   if (N0.getOpcode() == ISD::SETCC) {
7336     SDValue N00 = N0.getOperand(0);
7337     SDValue N01 = N0.getOperand(1);
7338     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7339     EVT N00VT = N0.getOperand(0).getValueType();
7340 
7341     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7342     // Only do this before legalize for now.
7343     if (VT.isVector() && !LegalOperations &&
7344         TLI.getBooleanContents(N00VT) ==
7345             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7346       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7347       // of the same size as the compared operands. Only optimize sext(setcc())
7348       // if this is the case.
7349       EVT SVT = getSetCCResultType(N00VT);
7350 
7351       // We know that the # elements of the results is the same as the
7352       // # elements of the compare (and the # elements of the compare result
7353       // for that matter).  Check to see that they are the same size.  If so,
7354       // we know that the element size of the sext'd result matches the
7355       // element size of the compare operands.
7356       if (VT.getSizeInBits() == SVT.getSizeInBits())
7357         return DAG.getSetCC(DL, VT, N00, N01, CC);
7358 
7359       // If the desired elements are smaller or larger than the source
7360       // elements, we can use a matching integer vector type and then
7361       // truncate/sign extend.
7362       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7363       if (SVT == MatchingVecType) {
7364         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7365         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7366       }
7367     }
7368 
7369     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7370     // Here, T can be 1 or -1, depending on the type of the setcc and
7371     // getBooleanContents().
7372     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7373 
7374     // To determine the "true" side of the select, we need to know the high bit
7375     // of the value returned by the setcc if it evaluates to true.
7376     // If the type of the setcc is i1, then the true case of the select is just
7377     // sext(i1 1), that is, -1.
7378     // If the type of the setcc is larger (say, i8) then the value of the high
7379     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7380     // of the appropriate width.
7381     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7382                                            : TLI.getConstTrueVal(DAG, VT, DL);
7383     SDValue Zero = DAG.getConstant(0, DL, VT);
7384     if (SDValue SCC =
7385             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7386       return SCC;
7387 
7388     if (!VT.isVector()) {
7389       EVT SetCCVT = getSetCCResultType(N00VT);
7390       // Don't do this transform for i1 because there's a select transform
7391       // that would reverse it.
7392       // TODO: We should not do this transform at all without a target hook
7393       // because a sext is likely cheaper than a select?
7394       if (SetCCVT.getScalarSizeInBits() != 1 &&
7395           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7396         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7397         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7398       }
7399     }
7400   }
7401 
7402   // fold (sext x) -> (zext x) if the sign bit is known zero.
7403   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7404       DAG.SignBitIsZero(N0))
7405     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7406 
7407   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7408     return NewVSel;
7409 
7410   return SDValue();
7411 }
7412 
7413 // isTruncateOf - If N is a truncate of some other value, return true, record
7414 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7415 // This function computes KnownBits to avoid a duplicated call to
7416 // computeKnownBits in the caller.
7417 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7418                          KnownBits &Known) {
7419   if (N->getOpcode() == ISD::TRUNCATE) {
7420     Op = N->getOperand(0);
7421     DAG.computeKnownBits(Op, Known);
7422     return true;
7423   }
7424 
7425   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7426       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7427     return false;
7428 
7429   SDValue Op0 = N->getOperand(0);
7430   SDValue Op1 = N->getOperand(1);
7431   assert(Op0.getValueType() == Op1.getValueType());
7432 
7433   if (isNullConstant(Op0))
7434     Op = Op1;
7435   else if (isNullConstant(Op1))
7436     Op = Op0;
7437   else
7438     return false;
7439 
7440   DAG.computeKnownBits(Op, Known);
7441 
7442   if (!(Known.Zero | 1).isAllOnesValue())
7443     return false;
7444 
7445   return true;
7446 }
7447 
7448 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7449   SDValue N0 = N->getOperand(0);
7450   EVT VT = N->getValueType(0);
7451 
7452   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7453                                               LegalOperations))
7454     return SDValue(Res, 0);
7455 
7456   // fold (zext (zext x)) -> (zext x)
7457   // fold (zext (aext x)) -> (zext x)
7458   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7459     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7460                        N0.getOperand(0));
7461 
7462   // fold (zext (truncate x)) -> (zext x) or
7463   //      (zext (truncate x)) -> (truncate x)
7464   // This is valid when the truncated bits of x are already zero.
7465   // FIXME: We should extend this to work for vectors too.
7466   SDValue Op;
7467   KnownBits Known;
7468   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7469     APInt TruncatedBits =
7470       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7471       APInt(Op.getValueSizeInBits(), 0) :
7472       APInt::getBitsSet(Op.getValueSizeInBits(),
7473                         N0.getValueSizeInBits(),
7474                         std::min(Op.getValueSizeInBits(),
7475                                  VT.getSizeInBits()));
7476     if (TruncatedBits.isSubsetOf(Known.Zero))
7477       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7478   }
7479 
7480   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7481   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7482   if (N0.getOpcode() == ISD::TRUNCATE) {
7483     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7484       SDNode *oye = N0.getOperand(0).getNode();
7485       if (NarrowLoad.getNode() != N0.getNode()) {
7486         CombineTo(N0.getNode(), NarrowLoad);
7487         // CombineTo deleted the truncate, if needed, but not what's under it.
7488         AddToWorklist(oye);
7489       }
7490       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7491     }
7492   }
7493 
7494   // fold (zext (truncate x)) -> (and x, mask)
7495   if (N0.getOpcode() == ISD::TRUNCATE) {
7496     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7497     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7498     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7499       SDNode *oye = N0.getOperand(0).getNode();
7500       if (NarrowLoad.getNode() != N0.getNode()) {
7501         CombineTo(N0.getNode(), NarrowLoad);
7502         // CombineTo deleted the truncate, if needed, but not what's under it.
7503         AddToWorklist(oye);
7504       }
7505       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7506     }
7507 
7508     EVT SrcVT = N0.getOperand(0).getValueType();
7509     EVT MinVT = N0.getValueType();
7510 
7511     // Try to mask before the extension to avoid having to generate a larger mask,
7512     // possibly over several sub-vectors.
7513     if (SrcVT.bitsLT(VT)) {
7514       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7515                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7516         SDValue Op = N0.getOperand(0);
7517         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7518         AddToWorklist(Op.getNode());
7519         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7520       }
7521     }
7522 
7523     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7524       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7525       AddToWorklist(Op.getNode());
7526       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7527     }
7528   }
7529 
7530   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7531   // if either of the casts is not free.
7532   if (N0.getOpcode() == ISD::AND &&
7533       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7534       N0.getOperand(1).getOpcode() == ISD::Constant &&
7535       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7536                            N0.getValueType()) ||
7537        !TLI.isZExtFree(N0.getValueType(), VT))) {
7538     SDValue X = N0.getOperand(0).getOperand(0);
7539     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7540     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7541     Mask = Mask.zext(VT.getSizeInBits());
7542     SDLoc DL(N);
7543     return DAG.getNode(ISD::AND, DL, VT,
7544                        X, DAG.getConstant(Mask, DL, VT));
7545   }
7546 
7547   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7548   // Only generate vector extloads when 1) they're legal, and 2) they are
7549   // deemed desirable by the target.
7550   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7551       ((!LegalOperations && !VT.isVector() &&
7552         !cast<LoadSDNode>(N0)->isVolatile()) ||
7553        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7554     bool DoXform = true;
7555     SmallVector<SDNode*, 4> SetCCs;
7556     if (!N0.hasOneUse())
7557       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7558     if (VT.isVector())
7559       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7560     if (DoXform) {
7561       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7562       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7563                                        LN0->getChain(),
7564                                        LN0->getBasePtr(), N0.getValueType(),
7565                                        LN0->getMemOperand());
7566 
7567       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7568                                   N0.getValueType(), ExtLoad);
7569       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7570       // If the load value is used only by N, replace it via CombineTo N.
7571       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7572       CombineTo(N, ExtLoad);
7573       if (NoReplaceTrunc)
7574         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7575       else
7576         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7577       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7578     }
7579   }
7580 
7581   // fold (zext (load x)) to multiple smaller zextloads.
7582   // Only on illegal but splittable vectors.
7583   if (SDValue ExtLoad = CombineExtLoad(N))
7584     return ExtLoad;
7585 
7586   // fold (zext (and/or/xor (load x), cst)) ->
7587   //      (and/or/xor (zextload x), (zext cst))
7588   // Unless (and (load x) cst) will match as a zextload already and has
7589   // additional users.
7590   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7591        N0.getOpcode() == ISD::XOR) &&
7592       isa<LoadSDNode>(N0.getOperand(0)) &&
7593       N0.getOperand(1).getOpcode() == ISD::Constant &&
7594       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7595       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7596     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7597     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7598       bool DoXform = true;
7599       SmallVector<SDNode*, 4> SetCCs;
7600       if (!N0.hasOneUse()) {
7601         if (N0.getOpcode() == ISD::AND) {
7602           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7603           auto NarrowLoad = false;
7604           EVT LoadResultTy = AndC->getValueType(0);
7605           EVT ExtVT, LoadedVT;
7606           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7607                                NarrowLoad))
7608             DoXform = false;
7609         }
7610         if (DoXform)
7611           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7612                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7613       }
7614       if (DoXform) {
7615         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7616                                          LN0->getChain(), LN0->getBasePtr(),
7617                                          LN0->getMemoryVT(),
7618                                          LN0->getMemOperand());
7619         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7620         Mask = Mask.zext(VT.getSizeInBits());
7621         SDLoc DL(N);
7622         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7623                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7624         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7625                                     SDLoc(N0.getOperand(0)),
7626                                     N0.getOperand(0).getValueType(), ExtLoad);
7627         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7628         bool NoReplaceTruncAnd = !N0.hasOneUse();
7629         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7630         CombineTo(N, And);
7631         // If N0 has multiple uses, change other uses as well.
7632         if (NoReplaceTruncAnd) {
7633           SDValue TruncAnd =
7634               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7635           CombineTo(N0.getNode(), TruncAnd);
7636         }
7637         if (NoReplaceTrunc)
7638           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7639         else
7640           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7641         return SDValue(N,0); // Return N so it doesn't get rechecked!
7642       }
7643     }
7644   }
7645 
7646   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7647   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7648   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7649       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7650     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7651     EVT MemVT = LN0->getMemoryVT();
7652     if ((!LegalOperations && !LN0->isVolatile()) ||
7653         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7654       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7655                                        LN0->getChain(),
7656                                        LN0->getBasePtr(), MemVT,
7657                                        LN0->getMemOperand());
7658       CombineTo(N, ExtLoad);
7659       CombineTo(N0.getNode(),
7660                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7661                             ExtLoad),
7662                 ExtLoad.getValue(1));
7663       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7664     }
7665   }
7666 
7667   if (N0.getOpcode() == ISD::SETCC) {
7668     // Only do this before legalize for now.
7669     if (!LegalOperations && VT.isVector() &&
7670         N0.getValueType().getVectorElementType() == MVT::i1) {
7671       EVT N00VT = N0.getOperand(0).getValueType();
7672       if (getSetCCResultType(N00VT) == N0.getValueType())
7673         return SDValue();
7674 
7675       // We know that the # elements of the results is the same as the #
7676       // elements of the compare (and the # elements of the compare result for
7677       // that matter). Check to see that they are the same size. If so, we know
7678       // that the element size of the sext'd result matches the element size of
7679       // the compare operands.
7680       SDLoc DL(N);
7681       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7682       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7683         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7684         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7685                                      N0.getOperand(1), N0.getOperand(2));
7686         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7687       }
7688 
7689       // If the desired elements are smaller or larger than the source
7690       // elements we can use a matching integer vector type and then
7691       // truncate/sign extend.
7692       EVT MatchingElementType = EVT::getIntegerVT(
7693           *DAG.getContext(), N00VT.getScalarSizeInBits());
7694       EVT MatchingVectorType = EVT::getVectorVT(
7695           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7696       SDValue VsetCC =
7697           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7698                       N0.getOperand(1), N0.getOperand(2));
7699       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7700                          VecOnes);
7701     }
7702 
7703     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7704     SDLoc DL(N);
7705     if (SDValue SCC = SimplifySelectCC(
7706             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7707             DAG.getConstant(0, DL, VT),
7708             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7709       return SCC;
7710   }
7711 
7712   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7713   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7714       isa<ConstantSDNode>(N0.getOperand(1)) &&
7715       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7716       N0.hasOneUse()) {
7717     SDValue ShAmt = N0.getOperand(1);
7718     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7719     if (N0.getOpcode() == ISD::SHL) {
7720       SDValue InnerZExt = N0.getOperand(0);
7721       // If the original shl may be shifting out bits, do not perform this
7722       // transformation.
7723       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7724         InnerZExt.getOperand(0).getValueSizeInBits();
7725       if (ShAmtVal > KnownZeroBits)
7726         return SDValue();
7727     }
7728 
7729     SDLoc DL(N);
7730 
7731     // Ensure that the shift amount is wide enough for the shifted value.
7732     if (VT.getSizeInBits() >= 256)
7733       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7734 
7735     return DAG.getNode(N0.getOpcode(), DL, VT,
7736                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7737                        ShAmt);
7738   }
7739 
7740   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7741     return NewVSel;
7742 
7743   return SDValue();
7744 }
7745 
7746 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7747   SDValue N0 = N->getOperand(0);
7748   EVT VT = N->getValueType(0);
7749 
7750   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7751                                               LegalOperations))
7752     return SDValue(Res, 0);
7753 
7754   // fold (aext (aext x)) -> (aext x)
7755   // fold (aext (zext x)) -> (zext x)
7756   // fold (aext (sext x)) -> (sext x)
7757   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7758       N0.getOpcode() == ISD::ZERO_EXTEND ||
7759       N0.getOpcode() == ISD::SIGN_EXTEND)
7760     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7761 
7762   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7763   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7764   if (N0.getOpcode() == ISD::TRUNCATE) {
7765     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7766       SDNode *oye = N0.getOperand(0).getNode();
7767       if (NarrowLoad.getNode() != N0.getNode()) {
7768         CombineTo(N0.getNode(), NarrowLoad);
7769         // CombineTo deleted the truncate, if needed, but not what's under it.
7770         AddToWorklist(oye);
7771       }
7772       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7773     }
7774   }
7775 
7776   // fold (aext (truncate x))
7777   if (N0.getOpcode() == ISD::TRUNCATE)
7778     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7779 
7780   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7781   // if the trunc is not free.
7782   if (N0.getOpcode() == ISD::AND &&
7783       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7784       N0.getOperand(1).getOpcode() == ISD::Constant &&
7785       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7786                           N0.getValueType())) {
7787     SDLoc DL(N);
7788     SDValue X = N0.getOperand(0).getOperand(0);
7789     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7790     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7791     Mask = Mask.zext(VT.getSizeInBits());
7792     return DAG.getNode(ISD::AND, DL, VT,
7793                        X, DAG.getConstant(Mask, DL, VT));
7794   }
7795 
7796   // fold (aext (load x)) -> (aext (truncate (extload x)))
7797   // None of the supported targets knows how to perform load and any_ext
7798   // on vectors in one instruction.  We only perform this transformation on
7799   // scalars.
7800   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7801       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7802       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7803     bool DoXform = true;
7804     SmallVector<SDNode*, 4> SetCCs;
7805     if (!N0.hasOneUse())
7806       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7807     if (DoXform) {
7808       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7809       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7810                                        LN0->getChain(),
7811                                        LN0->getBasePtr(), N0.getValueType(),
7812                                        LN0->getMemOperand());
7813       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7814                                   N0.getValueType(), ExtLoad);
7815       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7816                       ISD::ANY_EXTEND);
7817       // If the load value is used only by N, replace it via CombineTo N.
7818       bool NoReplaceTrunc = N0.hasOneUse();
7819       CombineTo(N, ExtLoad);
7820       if (NoReplaceTrunc)
7821         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7822       else
7823         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7824       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7825     }
7826   }
7827 
7828   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7829   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7830   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7831   if (N0.getOpcode() == ISD::LOAD &&
7832       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7833       N0.hasOneUse()) {
7834     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7835     ISD::LoadExtType ExtType = LN0->getExtensionType();
7836     EVT MemVT = LN0->getMemoryVT();
7837     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7838       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7839                                        VT, LN0->getChain(), LN0->getBasePtr(),
7840                                        MemVT, LN0->getMemOperand());
7841       CombineTo(N, ExtLoad);
7842       CombineTo(N0.getNode(),
7843                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7844                             N0.getValueType(), ExtLoad),
7845                 ExtLoad.getValue(1));
7846       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7847     }
7848   }
7849 
7850   if (N0.getOpcode() == ISD::SETCC) {
7851     // For vectors:
7852     // aext(setcc) -> vsetcc
7853     // aext(setcc) -> truncate(vsetcc)
7854     // aext(setcc) -> aext(vsetcc)
7855     // Only do this before legalize for now.
7856     if (VT.isVector() && !LegalOperations) {
7857       EVT N0VT = N0.getOperand(0).getValueType();
7858         // We know that the # elements of the results is the same as the
7859         // # elements of the compare (and the # elements of the compare result
7860         // for that matter).  Check to see that they are the same size.  If so,
7861         // we know that the element size of the sext'd result matches the
7862         // element size of the compare operands.
7863       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7864         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7865                              N0.getOperand(1),
7866                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7867       // If the desired elements are smaller or larger than the source
7868       // elements we can use a matching integer vector type and then
7869       // truncate/any extend
7870       else {
7871         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7872         SDValue VsetCC =
7873           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7874                         N0.getOperand(1),
7875                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7876         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7877       }
7878     }
7879 
7880     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7881     SDLoc DL(N);
7882     if (SDValue SCC = SimplifySelectCC(
7883             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7884             DAG.getConstant(0, DL, VT),
7885             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7886       return SCC;
7887   }
7888 
7889   return SDValue();
7890 }
7891 
7892 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7893   SDValue N0 = N->getOperand(0);
7894   SDValue N1 = N->getOperand(1);
7895   EVT EVT = cast<VTSDNode>(N1)->getVT();
7896 
7897   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7898   if (N0.getOpcode() == ISD::AssertZext &&
7899       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7900     return N0;
7901 
7902   return SDValue();
7903 }
7904 
7905 /// If the result of a wider load is shifted to right of N  bits and then
7906 /// truncated to a narrower type and where N is a multiple of number of bits of
7907 /// the narrower type, transform it to a narrower load from address + N / num of
7908 /// bits of new type. If the result is to be extended, also fold the extension
7909 /// to form a extending load.
7910 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7911   unsigned Opc = N->getOpcode();
7912 
7913   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7914   SDValue N0 = N->getOperand(0);
7915   EVT VT = N->getValueType(0);
7916   EVT ExtVT = VT;
7917 
7918   // This transformation isn't valid for vector loads.
7919   if (VT.isVector())
7920     return SDValue();
7921 
7922   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7923   // extended to VT.
7924   if (Opc == ISD::SIGN_EXTEND_INREG) {
7925     ExtType = ISD::SEXTLOAD;
7926     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7927   } else if (Opc == ISD::SRL) {
7928     // Another special-case: SRL is basically zero-extending a narrower value.
7929     ExtType = ISD::ZEXTLOAD;
7930     N0 = SDValue(N, 0);
7931     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7932     if (!N01) return SDValue();
7933     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7934                               VT.getSizeInBits() - N01->getZExtValue());
7935   }
7936   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7937     return SDValue();
7938 
7939   unsigned EVTBits = ExtVT.getSizeInBits();
7940 
7941   // Do not generate loads of non-round integer types since these can
7942   // be expensive (and would be wrong if the type is not byte sized).
7943   if (!ExtVT.isRound())
7944     return SDValue();
7945 
7946   unsigned ShAmt = 0;
7947   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7948     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7949       ShAmt = N01->getZExtValue();
7950       // Is the shift amount a multiple of size of VT?
7951       if ((ShAmt & (EVTBits-1)) == 0) {
7952         N0 = N0.getOperand(0);
7953         // Is the load width a multiple of size of VT?
7954         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7955           return SDValue();
7956       }
7957 
7958       // At this point, we must have a load or else we can't do the transform.
7959       if (!isa<LoadSDNode>(N0)) return SDValue();
7960 
7961       // Because a SRL must be assumed to *need* to zero-extend the high bits
7962       // (as opposed to anyext the high bits), we can't combine the zextload
7963       // lowering of SRL and an sextload.
7964       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7965         return SDValue();
7966 
7967       // If the shift amount is larger than the input type then we're not
7968       // accessing any of the loaded bytes.  If the load was a zextload/extload
7969       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7970       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7971         return SDValue();
7972     }
7973   }
7974 
7975   // If the load is shifted left (and the result isn't shifted back right),
7976   // we can fold the truncate through the shift.
7977   unsigned ShLeftAmt = 0;
7978   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7979       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7980     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7981       ShLeftAmt = N01->getZExtValue();
7982       N0 = N0.getOperand(0);
7983     }
7984   }
7985 
7986   // If we haven't found a load, we can't narrow it.  Don't transform one with
7987   // multiple uses, this would require adding a new load.
7988   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7989     return SDValue();
7990 
7991   // Don't change the width of a volatile load.
7992   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7993   if (LN0->isVolatile())
7994     return SDValue();
7995 
7996   // Verify that we are actually reducing a load width here.
7997   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7998     return SDValue();
7999 
8000   // For the transform to be legal, the load must produce only two values
8001   // (the value loaded and the chain).  Don't transform a pre-increment
8002   // load, for example, which produces an extra value.  Otherwise the
8003   // transformation is not equivalent, and the downstream logic to replace
8004   // uses gets things wrong.
8005   if (LN0->getNumValues() > 2)
8006     return SDValue();
8007 
8008   // If the load that we're shrinking is an extload and we're not just
8009   // discarding the extension we can't simply shrink the load. Bail.
8010   // TODO: It would be possible to merge the extensions in some cases.
8011   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
8012       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
8013     return SDValue();
8014 
8015   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
8016     return SDValue();
8017 
8018   EVT PtrType = N0.getOperand(1).getValueType();
8019 
8020   if (PtrType == MVT::Untyped || PtrType.isExtended())
8021     // It's not possible to generate a constant of extended or untyped type.
8022     return SDValue();
8023 
8024   // For big endian targets, we need to adjust the offset to the pointer to
8025   // load the correct bytes.
8026   if (DAG.getDataLayout().isBigEndian()) {
8027     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8028     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8029     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8030   }
8031 
8032   uint64_t PtrOff = ShAmt / 8;
8033   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8034   SDLoc DL(LN0);
8035   // The original load itself didn't wrap, so an offset within it doesn't.
8036   SDNodeFlags Flags;
8037   Flags.setNoUnsignedWrap(true);
8038   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8039                                PtrType, LN0->getBasePtr(),
8040                                DAG.getConstant(PtrOff, DL, PtrType),
8041                                Flags);
8042   AddToWorklist(NewPtr.getNode());
8043 
8044   SDValue Load;
8045   if (ExtType == ISD::NON_EXTLOAD)
8046     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8047                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8048                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8049   else
8050     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8051                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8052                           NewAlign, LN0->getMemOperand()->getFlags(),
8053                           LN0->getAAInfo());
8054 
8055   // Replace the old load's chain with the new load's chain.
8056   WorklistRemover DeadNodes(*this);
8057   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8058 
8059   // Shift the result left, if we've swallowed a left shift.
8060   SDValue Result = Load;
8061   if (ShLeftAmt != 0) {
8062     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8063     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8064       ShImmTy = VT;
8065     // If the shift amount is as large as the result size (but, presumably,
8066     // no larger than the source) then the useful bits of the result are
8067     // zero; we can't simply return the shortened shift, because the result
8068     // of that operation is undefined.
8069     SDLoc DL(N0);
8070     if (ShLeftAmt >= VT.getSizeInBits())
8071       Result = DAG.getConstant(0, DL, VT);
8072     else
8073       Result = DAG.getNode(ISD::SHL, DL, VT,
8074                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8075   }
8076 
8077   // Return the new loaded value.
8078   return Result;
8079 }
8080 
8081 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8082   SDValue N0 = N->getOperand(0);
8083   SDValue N1 = N->getOperand(1);
8084   EVT VT = N->getValueType(0);
8085   EVT EVT = cast<VTSDNode>(N1)->getVT();
8086   unsigned VTBits = VT.getScalarSizeInBits();
8087   unsigned EVTBits = EVT.getScalarSizeInBits();
8088 
8089   if (N0.isUndef())
8090     return DAG.getUNDEF(VT);
8091 
8092   // fold (sext_in_reg c1) -> c1
8093   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8094     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8095 
8096   // If the input is already sign extended, just drop the extension.
8097   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8098     return N0;
8099 
8100   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8101   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8102       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8103     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8104                        N0.getOperand(0), N1);
8105 
8106   // fold (sext_in_reg (sext x)) -> (sext x)
8107   // fold (sext_in_reg (aext x)) -> (sext x)
8108   // if x is small enough.
8109   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8110     SDValue N00 = N0.getOperand(0);
8111     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8112         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8113       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8114   }
8115 
8116   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8117   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8118        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8119        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8120       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8121     if (!LegalOperations ||
8122         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8123       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8124   }
8125 
8126   // fold (sext_in_reg (zext x)) -> (sext x)
8127   // iff we are extending the source sign bit.
8128   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8129     SDValue N00 = N0.getOperand(0);
8130     if (N00.getScalarValueSizeInBits() == EVTBits &&
8131         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8132       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8133   }
8134 
8135   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8136   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8137     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8138 
8139   // fold operands of sext_in_reg based on knowledge that the top bits are not
8140   // demanded.
8141   if (SimplifyDemandedBits(SDValue(N, 0)))
8142     return SDValue(N, 0);
8143 
8144   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8145   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8146   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8147     return NarrowLoad;
8148 
8149   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8150   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8151   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8152   if (N0.getOpcode() == ISD::SRL) {
8153     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8154       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8155         // We can turn this into an SRA iff the input to the SRL is already sign
8156         // extended enough.
8157         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8158         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8159           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8160                              N0.getOperand(0), N0.getOperand(1));
8161       }
8162   }
8163 
8164   // fold (sext_inreg (extload x)) -> (sextload x)
8165   if (ISD::isEXTLoad(N0.getNode()) &&
8166       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8167       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8168       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8169        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8170     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8171     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8172                                      LN0->getChain(),
8173                                      LN0->getBasePtr(), EVT,
8174                                      LN0->getMemOperand());
8175     CombineTo(N, ExtLoad);
8176     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8177     AddToWorklist(ExtLoad.getNode());
8178     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8179   }
8180   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8181   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8182       N0.hasOneUse() &&
8183       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8184       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8185        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8186     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8187     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8188                                      LN0->getChain(),
8189                                      LN0->getBasePtr(), EVT,
8190                                      LN0->getMemOperand());
8191     CombineTo(N, ExtLoad);
8192     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8193     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8194   }
8195 
8196   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8197   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8198     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8199                                            N0.getOperand(1), false))
8200       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8201                          BSwap, N1);
8202   }
8203 
8204   return SDValue();
8205 }
8206 
8207 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8208   SDValue N0 = N->getOperand(0);
8209   EVT VT = N->getValueType(0);
8210 
8211   if (N0.isUndef())
8212     return DAG.getUNDEF(VT);
8213 
8214   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8215                                               LegalOperations))
8216     return SDValue(Res, 0);
8217 
8218   return SDValue();
8219 }
8220 
8221 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8222   SDValue N0 = N->getOperand(0);
8223   EVT VT = N->getValueType(0);
8224 
8225   if (N0.isUndef())
8226     return DAG.getUNDEF(VT);
8227 
8228   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8229                                               LegalOperations))
8230     return SDValue(Res, 0);
8231 
8232   return SDValue();
8233 }
8234 
8235 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8236   SDValue N0 = N->getOperand(0);
8237   EVT VT = N->getValueType(0);
8238   bool isLE = DAG.getDataLayout().isLittleEndian();
8239 
8240   // noop truncate
8241   if (N0.getValueType() == N->getValueType(0))
8242     return N0;
8243   // fold (truncate c1) -> c1
8244   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8245     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8246   // fold (truncate (truncate x)) -> (truncate x)
8247   if (N0.getOpcode() == ISD::TRUNCATE)
8248     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8249   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8250   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8251       N0.getOpcode() == ISD::SIGN_EXTEND ||
8252       N0.getOpcode() == ISD::ANY_EXTEND) {
8253     // if the source is smaller than the dest, we still need an extend.
8254     if (N0.getOperand(0).getValueType().bitsLT(VT))
8255       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8256     // if the source is larger than the dest, than we just need the truncate.
8257     if (N0.getOperand(0).getValueType().bitsGT(VT))
8258       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8259     // if the source and dest are the same type, we can drop both the extend
8260     // and the truncate.
8261     return N0.getOperand(0);
8262   }
8263 
8264   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8265   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8266     return SDValue();
8267 
8268   // Fold extract-and-trunc into a narrow extract. For example:
8269   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8270   //   i32 y = TRUNCATE(i64 x)
8271   //        -- becomes --
8272   //   v16i8 b = BITCAST (v2i64 val)
8273   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8274   //
8275   // Note: We only run this optimization after type legalization (which often
8276   // creates this pattern) and before operation legalization after which
8277   // we need to be more careful about the vector instructions that we generate.
8278   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8279       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8280 
8281     EVT VecTy = N0.getOperand(0).getValueType();
8282     EVT ExTy = N0.getValueType();
8283     EVT TrTy = N->getValueType(0);
8284 
8285     unsigned NumElem = VecTy.getVectorNumElements();
8286     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8287 
8288     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8289     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8290 
8291     SDValue EltNo = N0->getOperand(1);
8292     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8293       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8294       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8295       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8296 
8297       SDLoc DL(N);
8298       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8299                          DAG.getBitcast(NVT, N0.getOperand(0)),
8300                          DAG.getConstant(Index, DL, IndexTy));
8301     }
8302   }
8303 
8304   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8305   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8306     EVT SrcVT = N0.getValueType();
8307     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8308         TLI.isTruncateFree(SrcVT, VT)) {
8309       SDLoc SL(N0);
8310       SDValue Cond = N0.getOperand(0);
8311       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8312       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8313       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8314     }
8315   }
8316 
8317   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8318   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8319       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8320       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8321     SDValue Amt = N0.getOperand(1);
8322     KnownBits Known;
8323     DAG.computeKnownBits(Amt, Known);
8324     unsigned Size = VT.getScalarSizeInBits();
8325     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8326       SDLoc SL(N);
8327       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8328 
8329       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8330       if (AmtVT != Amt.getValueType()) {
8331         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8332         AddToWorklist(Amt.getNode());
8333       }
8334       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8335     }
8336   }
8337 
8338   // Fold a series of buildvector, bitcast, and truncate if possible.
8339   // For example fold
8340   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8341   //   (2xi32 (buildvector x, y)).
8342   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8343       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8344       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8345       N0.getOperand(0).hasOneUse()) {
8346 
8347     SDValue BuildVect = N0.getOperand(0);
8348     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8349     EVT TruncVecEltTy = VT.getVectorElementType();
8350 
8351     // Check that the element types match.
8352     if (BuildVectEltTy == TruncVecEltTy) {
8353       // Now we only need to compute the offset of the truncated elements.
8354       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8355       unsigned TruncVecNumElts = VT.getVectorNumElements();
8356       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8357 
8358       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8359              "Invalid number of elements");
8360 
8361       SmallVector<SDValue, 8> Opnds;
8362       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8363         Opnds.push_back(BuildVect.getOperand(i));
8364 
8365       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8366     }
8367   }
8368 
8369   // See if we can simplify the input to this truncate through knowledge that
8370   // only the low bits are being used.
8371   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8372   // Currently we only perform this optimization on scalars because vectors
8373   // may have different active low bits.
8374   if (!VT.isVector()) {
8375     APInt Mask =
8376         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8377     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8378       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8379   }
8380 
8381   // fold (truncate (load x)) -> (smaller load x)
8382   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8383   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8384     if (SDValue Reduced = ReduceLoadWidth(N))
8385       return Reduced;
8386 
8387     // Handle the case where the load remains an extending load even
8388     // after truncation.
8389     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8390       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8391       if (!LN0->isVolatile() &&
8392           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8393         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8394                                          VT, LN0->getChain(), LN0->getBasePtr(),
8395                                          LN0->getMemoryVT(),
8396                                          LN0->getMemOperand());
8397         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8398         return NewLoad;
8399       }
8400     }
8401   }
8402 
8403   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8404   // where ... are all 'undef'.
8405   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8406     SmallVector<EVT, 8> VTs;
8407     SDValue V;
8408     unsigned Idx = 0;
8409     unsigned NumDefs = 0;
8410 
8411     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8412       SDValue X = N0.getOperand(i);
8413       if (!X.isUndef()) {
8414         V = X;
8415         Idx = i;
8416         NumDefs++;
8417       }
8418       // Stop if more than one members are non-undef.
8419       if (NumDefs > 1)
8420         break;
8421       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8422                                      VT.getVectorElementType(),
8423                                      X.getValueType().getVectorNumElements()));
8424     }
8425 
8426     if (NumDefs == 0)
8427       return DAG.getUNDEF(VT);
8428 
8429     if (NumDefs == 1) {
8430       assert(V.getNode() && "The single defined operand is empty!");
8431       SmallVector<SDValue, 8> Opnds;
8432       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8433         if (i != Idx) {
8434           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8435           continue;
8436         }
8437         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8438         AddToWorklist(NV.getNode());
8439         Opnds.push_back(NV);
8440       }
8441       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8442     }
8443   }
8444 
8445   // Fold truncate of a bitcast of a vector to an extract of the low vector
8446   // element.
8447   //
8448   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8449   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8450     SDValue VecSrc = N0.getOperand(0);
8451     EVT SrcVT = VecSrc.getValueType();
8452     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8453         (!LegalOperations ||
8454          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8455       SDLoc SL(N);
8456 
8457       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8458       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8459       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8460                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8461     }
8462   }
8463 
8464   // Simplify the operands using demanded-bits information.
8465   if (!VT.isVector() &&
8466       SimplifyDemandedBits(SDValue(N, 0)))
8467     return SDValue(N, 0);
8468 
8469   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8470   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8471   // When the adde's carry is not used.
8472   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8473       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8474       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8475     SDLoc SL(N);
8476     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8477     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8478     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8479     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8480   }
8481 
8482   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8483     return NewVSel;
8484 
8485   return SDValue();
8486 }
8487 
8488 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8489   SDValue Elt = N->getOperand(i);
8490   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8491     return Elt.getNode();
8492   return Elt.getOperand(Elt.getResNo()).getNode();
8493 }
8494 
8495 /// build_pair (load, load) -> load
8496 /// if load locations are consecutive.
8497 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8498   assert(N->getOpcode() == ISD::BUILD_PAIR);
8499 
8500   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8501   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8502   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8503       LD1->getAddressSpace() != LD2->getAddressSpace())
8504     return SDValue();
8505   EVT LD1VT = LD1->getValueType(0);
8506   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8507   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8508       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8509     unsigned Align = LD1->getAlignment();
8510     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8511         VT.getTypeForEVT(*DAG.getContext()));
8512 
8513     if (NewAlign <= Align &&
8514         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8515       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8516                          LD1->getPointerInfo(), Align);
8517   }
8518 
8519   return SDValue();
8520 }
8521 
8522 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8523   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8524   // and Lo parts; on big-endian machines it doesn't.
8525   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8526 }
8527 
8528 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8529                                     const TargetLowering &TLI) {
8530   // If this is not a bitcast to an FP type or if the target doesn't have
8531   // IEEE754-compliant FP logic, we're done.
8532   EVT VT = N->getValueType(0);
8533   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8534     return SDValue();
8535 
8536   // TODO: Use splat values for the constant-checking below and remove this
8537   // restriction.
8538   SDValue N0 = N->getOperand(0);
8539   EVT SourceVT = N0.getValueType();
8540   if (SourceVT.isVector())
8541     return SDValue();
8542 
8543   unsigned FPOpcode;
8544   APInt SignMask;
8545   switch (N0.getOpcode()) {
8546   case ISD::AND:
8547     FPOpcode = ISD::FABS;
8548     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8549     break;
8550   case ISD::XOR:
8551     FPOpcode = ISD::FNEG;
8552     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8553     break;
8554   // TODO: ISD::OR --> ISD::FNABS?
8555   default:
8556     return SDValue();
8557   }
8558 
8559   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8560   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8561   SDValue LogicOp0 = N0.getOperand(0);
8562   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8563   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8564       LogicOp0.getOpcode() == ISD::BITCAST &&
8565       LogicOp0->getOperand(0).getValueType() == VT)
8566     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8567 
8568   return SDValue();
8569 }
8570 
8571 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8572   SDValue N0 = N->getOperand(0);
8573   EVT VT = N->getValueType(0);
8574 
8575   if (N0.isUndef())
8576     return DAG.getUNDEF(VT);
8577 
8578   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8579   // Only do this before legalize, since afterward the target may be depending
8580   // on the bitconvert.
8581   // First check to see if this is all constant.
8582   if (!LegalTypes &&
8583       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8584       VT.isVector()) {
8585     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8586 
8587     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8588     assert(!DestEltVT.isVector() &&
8589            "Element type of vector ValueType must not be vector!");
8590     if (isSimple)
8591       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8592   }
8593 
8594   // If the input is a constant, let getNode fold it.
8595   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8596     // If we can't allow illegal operations, we need to check that this is just
8597     // a fp -> int or int -> conversion and that the resulting operation will
8598     // be legal.
8599     if (!LegalOperations ||
8600         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8601          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8602         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8603          TLI.isOperationLegal(ISD::Constant, VT)))
8604       return DAG.getBitcast(VT, N0);
8605   }
8606 
8607   // (conv (conv x, t1), t2) -> (conv x, t2)
8608   if (N0.getOpcode() == ISD::BITCAST)
8609     return DAG.getBitcast(VT, N0.getOperand(0));
8610 
8611   // fold (conv (load x)) -> (load (conv*)x)
8612   // If the resultant load doesn't need a higher alignment than the original!
8613   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8614       // Do not change the width of a volatile load.
8615       !cast<LoadSDNode>(N0)->isVolatile() &&
8616       // Do not remove the cast if the types differ in endian layout.
8617       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8618           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8619       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8620       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8621     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8622     unsigned OrigAlign = LN0->getAlignment();
8623 
8624     bool Fast = false;
8625     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8626                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8627         Fast) {
8628       SDValue Load =
8629           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8630                       LN0->getPointerInfo(), OrigAlign,
8631                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8632       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8633       return Load;
8634     }
8635   }
8636 
8637   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8638     return V;
8639 
8640   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8641   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8642   //
8643   // For ppc_fp128:
8644   // fold (bitcast (fneg x)) ->
8645   //     flipbit = signbit
8646   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8647   //
8648   // fold (bitcast (fabs x)) ->
8649   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8650   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8651   // This often reduces constant pool loads.
8652   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8653        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8654       N0.getNode()->hasOneUse() && VT.isInteger() &&
8655       !VT.isVector() && !N0.getValueType().isVector()) {
8656     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8657     AddToWorklist(NewConv.getNode());
8658 
8659     SDLoc DL(N);
8660     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8661       assert(VT.getSizeInBits() == 128);
8662       SDValue SignBit = DAG.getConstant(
8663           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8664       SDValue FlipBit;
8665       if (N0.getOpcode() == ISD::FNEG) {
8666         FlipBit = SignBit;
8667         AddToWorklist(FlipBit.getNode());
8668       } else {
8669         assert(N0.getOpcode() == ISD::FABS);
8670         SDValue Hi =
8671             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8672                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8673                                               SDLoc(NewConv)));
8674         AddToWorklist(Hi.getNode());
8675         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8676         AddToWorklist(FlipBit.getNode());
8677       }
8678       SDValue FlipBits =
8679           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8680       AddToWorklist(FlipBits.getNode());
8681       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8682     }
8683     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8684     if (N0.getOpcode() == ISD::FNEG)
8685       return DAG.getNode(ISD::XOR, DL, VT,
8686                          NewConv, DAG.getConstant(SignBit, DL, VT));
8687     assert(N0.getOpcode() == ISD::FABS);
8688     return DAG.getNode(ISD::AND, DL, VT,
8689                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8690   }
8691 
8692   // fold (bitconvert (fcopysign cst, x)) ->
8693   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8694   // Note that we don't handle (copysign x, cst) because this can always be
8695   // folded to an fneg or fabs.
8696   //
8697   // For ppc_fp128:
8698   // fold (bitcast (fcopysign cst, x)) ->
8699   //     flipbit = (and (extract_element
8700   //                     (xor (bitcast cst), (bitcast x)), 0),
8701   //                    signbit)
8702   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8703   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8704       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8705       VT.isInteger() && !VT.isVector()) {
8706     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8707     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8708     if (isTypeLegal(IntXVT)) {
8709       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8710       AddToWorklist(X.getNode());
8711 
8712       // If X has a different width than the result/lhs, sext it or truncate it.
8713       unsigned VTWidth = VT.getSizeInBits();
8714       if (OrigXWidth < VTWidth) {
8715         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8716         AddToWorklist(X.getNode());
8717       } else if (OrigXWidth > VTWidth) {
8718         // To get the sign bit in the right place, we have to shift it right
8719         // before truncating.
8720         SDLoc DL(X);
8721         X = DAG.getNode(ISD::SRL, DL,
8722                         X.getValueType(), X,
8723                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8724                                         X.getValueType()));
8725         AddToWorklist(X.getNode());
8726         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8727         AddToWorklist(X.getNode());
8728       }
8729 
8730       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8731         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8732         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8733         AddToWorklist(Cst.getNode());
8734         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8735         AddToWorklist(X.getNode());
8736         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8737         AddToWorklist(XorResult.getNode());
8738         SDValue XorResult64 = DAG.getNode(
8739             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8740             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8741                                   SDLoc(XorResult)));
8742         AddToWorklist(XorResult64.getNode());
8743         SDValue FlipBit =
8744             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8745                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8746         AddToWorklist(FlipBit.getNode());
8747         SDValue FlipBits =
8748             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8749         AddToWorklist(FlipBits.getNode());
8750         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8751       }
8752       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8753       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8754                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8755       AddToWorklist(X.getNode());
8756 
8757       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8758       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8759                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8760       AddToWorklist(Cst.getNode());
8761 
8762       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8763     }
8764   }
8765 
8766   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8767   if (N0.getOpcode() == ISD::BUILD_PAIR)
8768     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8769       return CombineLD;
8770 
8771   // Remove double bitcasts from shuffles - this is often a legacy of
8772   // XformToShuffleWithZero being used to combine bitmaskings (of
8773   // float vectors bitcast to integer vectors) into shuffles.
8774   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8775   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8776       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8777       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8778       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8779     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8780 
8781     // If operands are a bitcast, peek through if it casts the original VT.
8782     // If operands are a constant, just bitcast back to original VT.
8783     auto PeekThroughBitcast = [&](SDValue Op) {
8784       if (Op.getOpcode() == ISD::BITCAST &&
8785           Op.getOperand(0).getValueType() == VT)
8786         return SDValue(Op.getOperand(0));
8787       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8788           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8789         return DAG.getBitcast(VT, Op);
8790       return SDValue();
8791     };
8792 
8793     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8794     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8795     if (!(SV0 && SV1))
8796       return SDValue();
8797 
8798     int MaskScale =
8799         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8800     SmallVector<int, 8> NewMask;
8801     for (int M : SVN->getMask())
8802       for (int i = 0; i != MaskScale; ++i)
8803         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8804 
8805     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8806     if (!LegalMask) {
8807       std::swap(SV0, SV1);
8808       ShuffleVectorSDNode::commuteMask(NewMask);
8809       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8810     }
8811 
8812     if (LegalMask)
8813       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8814   }
8815 
8816   return SDValue();
8817 }
8818 
8819 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8820   EVT VT = N->getValueType(0);
8821   return CombineConsecutiveLoads(N, VT);
8822 }
8823 
8824 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8825 /// operands. DstEltVT indicates the destination element value type.
8826 SDValue DAGCombiner::
8827 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8828   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8829 
8830   // If this is already the right type, we're done.
8831   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8832 
8833   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8834   unsigned DstBitSize = DstEltVT.getSizeInBits();
8835 
8836   // If this is a conversion of N elements of one type to N elements of another
8837   // type, convert each element.  This handles FP<->INT cases.
8838   if (SrcBitSize == DstBitSize) {
8839     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8840                               BV->getValueType(0).getVectorNumElements());
8841 
8842     // Due to the FP element handling below calling this routine recursively,
8843     // we can end up with a scalar-to-vector node here.
8844     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8845       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8846                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8847 
8848     SmallVector<SDValue, 8> Ops;
8849     for (SDValue Op : BV->op_values()) {
8850       // If the vector element type is not legal, the BUILD_VECTOR operands
8851       // are promoted and implicitly truncated.  Make that explicit here.
8852       if (Op.getValueType() != SrcEltVT)
8853         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8854       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8855       AddToWorklist(Ops.back().getNode());
8856     }
8857     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8858   }
8859 
8860   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8861   // handle annoying details of growing/shrinking FP values, we convert them to
8862   // int first.
8863   if (SrcEltVT.isFloatingPoint()) {
8864     // Convert the input float vector to a int vector where the elements are the
8865     // same sizes.
8866     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8867     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8868     SrcEltVT = IntVT;
8869   }
8870 
8871   // Now we know the input is an integer vector.  If the output is a FP type,
8872   // convert to integer first, then to FP of the right size.
8873   if (DstEltVT.isFloatingPoint()) {
8874     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8875     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8876 
8877     // Next, convert to FP elements of the same size.
8878     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8879   }
8880 
8881   SDLoc DL(BV);
8882 
8883   // Okay, we know the src/dst types are both integers of differing types.
8884   // Handling growing first.
8885   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8886   if (SrcBitSize < DstBitSize) {
8887     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8888 
8889     SmallVector<SDValue, 8> Ops;
8890     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8891          i += NumInputsPerOutput) {
8892       bool isLE = DAG.getDataLayout().isLittleEndian();
8893       APInt NewBits = APInt(DstBitSize, 0);
8894       bool EltIsUndef = true;
8895       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8896         // Shift the previously computed bits over.
8897         NewBits <<= SrcBitSize;
8898         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8899         if (Op.isUndef()) continue;
8900         EltIsUndef = false;
8901 
8902         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8903                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8904       }
8905 
8906       if (EltIsUndef)
8907         Ops.push_back(DAG.getUNDEF(DstEltVT));
8908       else
8909         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8910     }
8911 
8912     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8913     return DAG.getBuildVector(VT, DL, Ops);
8914   }
8915 
8916   // Finally, this must be the case where we are shrinking elements: each input
8917   // turns into multiple outputs.
8918   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8919   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8920                             NumOutputsPerInput*BV->getNumOperands());
8921   SmallVector<SDValue, 8> Ops;
8922 
8923   for (const SDValue &Op : BV->op_values()) {
8924     if (Op.isUndef()) {
8925       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8926       continue;
8927     }
8928 
8929     APInt OpVal = cast<ConstantSDNode>(Op)->
8930                   getAPIntValue().zextOrTrunc(SrcBitSize);
8931 
8932     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8933       APInt ThisVal = OpVal.trunc(DstBitSize);
8934       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8935       OpVal.lshrInPlace(DstBitSize);
8936     }
8937 
8938     // For big endian targets, swap the order of the pieces of each element.
8939     if (DAG.getDataLayout().isBigEndian())
8940       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8941   }
8942 
8943   return DAG.getBuildVector(VT, DL, Ops);
8944 }
8945 
8946 static bool isContractable(SDNode *N) {
8947   SDNodeFlags F = N->getFlags();
8948   return F.hasAllowContract() || F.hasUnsafeAlgebra();
8949 }
8950 
8951 /// Try to perform FMA combining on a given FADD node.
8952 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8953   SDValue N0 = N->getOperand(0);
8954   SDValue N1 = N->getOperand(1);
8955   EVT VT = N->getValueType(0);
8956   SDLoc SL(N);
8957 
8958   const TargetOptions &Options = DAG.getTarget().Options;
8959 
8960   // Floating-point multiply-add with intermediate rounding.
8961   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8962 
8963   // Floating-point multiply-add without intermediate rounding.
8964   bool HasFMA =
8965       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8966       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8967 
8968   // No valid opcode, do not combine.
8969   if (!HasFMAD && !HasFMA)
8970     return SDValue();
8971 
8972   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8973                               Options.UnsafeFPMath || HasFMAD);
8974   // If the addition is not contractable, do not combine.
8975   if (!AllowFusionGlobally && !isContractable(N))
8976     return SDValue();
8977 
8978   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8979   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
8980     return SDValue();
8981 
8982   // Always prefer FMAD to FMA for precision.
8983   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8984   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8985   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8986 
8987   // Is the node an FMUL and contractable either due to global flags or
8988   // SDNodeFlags.
8989   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
8990     if (N.getOpcode() != ISD::FMUL)
8991       return false;
8992     return AllowFusionGlobally || isContractable(N.getNode());
8993   };
8994   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8995   // prefer to fold the multiply with fewer uses.
8996   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
8997     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8998       std::swap(N0, N1);
8999   }
9000 
9001   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9002   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9003     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9004                        N0.getOperand(0), N0.getOperand(1), N1);
9005   }
9006 
9007   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9008   // Note: Commutes FADD operands.
9009   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9010     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9011                        N1.getOperand(0), N1.getOperand(1), N0);
9012   }
9013 
9014   // Look through FP_EXTEND nodes to do more combining.
9015   if (LookThroughFPExt) {
9016     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9017     if (N0.getOpcode() == ISD::FP_EXTEND) {
9018       SDValue N00 = N0.getOperand(0);
9019       if (isContractableFMUL(N00))
9020         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9021                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9022                                        N00.getOperand(0)),
9023                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9024                                        N00.getOperand(1)), N1);
9025     }
9026 
9027     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9028     // Note: Commutes FADD operands.
9029     if (N1.getOpcode() == ISD::FP_EXTEND) {
9030       SDValue N10 = N1.getOperand(0);
9031       if (isContractableFMUL(N10))
9032         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9033                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9034                                        N10.getOperand(0)),
9035                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9036                                        N10.getOperand(1)), N0);
9037     }
9038   }
9039 
9040   // More folding opportunities when target permits.
9041   if (Aggressive) {
9042     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9043     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9044     // are currently only supported on binary nodes.
9045     if (Options.UnsafeFPMath &&
9046         N0.getOpcode() == PreferredFusedOpcode &&
9047         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9048         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9049       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9050                          N0.getOperand(0), N0.getOperand(1),
9051                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9052                                      N0.getOperand(2).getOperand(0),
9053                                      N0.getOperand(2).getOperand(1),
9054                                      N1));
9055     }
9056 
9057     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9058     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9059     // are currently only supported on binary nodes.
9060     if (Options.UnsafeFPMath &&
9061         N1->getOpcode() == PreferredFusedOpcode &&
9062         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9063         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9064       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9065                          N1.getOperand(0), N1.getOperand(1),
9066                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9067                                      N1.getOperand(2).getOperand(0),
9068                                      N1.getOperand(2).getOperand(1),
9069                                      N0));
9070     }
9071 
9072     if (LookThroughFPExt) {
9073       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9074       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9075       auto FoldFAddFMAFPExtFMul = [&] (
9076           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9077         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9078                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9079                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9080                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9081                                        Z));
9082       };
9083       if (N0.getOpcode() == PreferredFusedOpcode) {
9084         SDValue N02 = N0.getOperand(2);
9085         if (N02.getOpcode() == ISD::FP_EXTEND) {
9086           SDValue N020 = N02.getOperand(0);
9087           if (isContractableFMUL(N020))
9088             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9089                                         N020.getOperand(0), N020.getOperand(1),
9090                                         N1);
9091         }
9092       }
9093 
9094       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9095       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9096       // FIXME: This turns two single-precision and one double-precision
9097       // operation into two double-precision operations, which might not be
9098       // interesting for all targets, especially GPUs.
9099       auto FoldFAddFPExtFMAFMul = [&] (
9100           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9101         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9102                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9103                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9104                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9105                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9106                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9107                                        Z));
9108       };
9109       if (N0.getOpcode() == ISD::FP_EXTEND) {
9110         SDValue N00 = N0.getOperand(0);
9111         if (N00.getOpcode() == PreferredFusedOpcode) {
9112           SDValue N002 = N00.getOperand(2);
9113           if (isContractableFMUL(N002))
9114             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9115                                         N002.getOperand(0), N002.getOperand(1),
9116                                         N1);
9117         }
9118       }
9119 
9120       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9121       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9122       if (N1.getOpcode() == PreferredFusedOpcode) {
9123         SDValue N12 = N1.getOperand(2);
9124         if (N12.getOpcode() == ISD::FP_EXTEND) {
9125           SDValue N120 = N12.getOperand(0);
9126           if (isContractableFMUL(N120))
9127             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9128                                         N120.getOperand(0), N120.getOperand(1),
9129                                         N0);
9130         }
9131       }
9132 
9133       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9134       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9135       // FIXME: This turns two single-precision and one double-precision
9136       // operation into two double-precision operations, which might not be
9137       // interesting for all targets, especially GPUs.
9138       if (N1.getOpcode() == ISD::FP_EXTEND) {
9139         SDValue N10 = N1.getOperand(0);
9140         if (N10.getOpcode() == PreferredFusedOpcode) {
9141           SDValue N102 = N10.getOperand(2);
9142           if (isContractableFMUL(N102))
9143             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9144                                         N102.getOperand(0), N102.getOperand(1),
9145                                         N0);
9146         }
9147       }
9148     }
9149   }
9150 
9151   return SDValue();
9152 }
9153 
9154 /// Try to perform FMA combining on a given FSUB node.
9155 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9156   SDValue N0 = N->getOperand(0);
9157   SDValue N1 = N->getOperand(1);
9158   EVT VT = N->getValueType(0);
9159   SDLoc SL(N);
9160 
9161   const TargetOptions &Options = DAG.getTarget().Options;
9162   // Floating-point multiply-add with intermediate rounding.
9163   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9164 
9165   // Floating-point multiply-add without intermediate rounding.
9166   bool HasFMA =
9167       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9168       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9169 
9170   // No valid opcode, do not combine.
9171   if (!HasFMAD && !HasFMA)
9172     return SDValue();
9173 
9174   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9175                               Options.UnsafeFPMath || HasFMAD);
9176   // If the subtraction is not contractable, do not combine.
9177   if (!AllowFusionGlobally && !isContractable(N))
9178     return SDValue();
9179 
9180   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9181   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9182     return SDValue();
9183 
9184   // Always prefer FMAD to FMA for precision.
9185   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9186   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9187   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9188 
9189   // Is the node an FMUL and contractable either due to global flags or
9190   // SDNodeFlags.
9191   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9192     if (N.getOpcode() != ISD::FMUL)
9193       return false;
9194     return AllowFusionGlobally || isContractable(N.getNode());
9195   };
9196 
9197   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9198   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9199     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9200                        N0.getOperand(0), N0.getOperand(1),
9201                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9202   }
9203 
9204   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9205   // Note: Commutes FSUB operands.
9206   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9207     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9208                        DAG.getNode(ISD::FNEG, SL, VT,
9209                                    N1.getOperand(0)),
9210                        N1.getOperand(1), N0);
9211 
9212   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9213   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9214       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9215     SDValue N00 = N0.getOperand(0).getOperand(0);
9216     SDValue N01 = N0.getOperand(0).getOperand(1);
9217     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9218                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9219                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9220   }
9221 
9222   // Look through FP_EXTEND nodes to do more combining.
9223   if (LookThroughFPExt) {
9224     // fold (fsub (fpext (fmul x, y)), z)
9225     //   -> (fma (fpext x), (fpext y), (fneg z))
9226     if (N0.getOpcode() == ISD::FP_EXTEND) {
9227       SDValue N00 = N0.getOperand(0);
9228       if (isContractableFMUL(N00))
9229         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9230                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9231                                        N00.getOperand(0)),
9232                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9233                                        N00.getOperand(1)),
9234                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9235     }
9236 
9237     // fold (fsub x, (fpext (fmul y, z)))
9238     //   -> (fma (fneg (fpext y)), (fpext z), x)
9239     // Note: Commutes FSUB operands.
9240     if (N1.getOpcode() == ISD::FP_EXTEND) {
9241       SDValue N10 = N1.getOperand(0);
9242       if (isContractableFMUL(N10))
9243         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9244                            DAG.getNode(ISD::FNEG, SL, VT,
9245                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9246                                                    N10.getOperand(0))),
9247                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9248                                        N10.getOperand(1)),
9249                            N0);
9250     }
9251 
9252     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9253     //   -> (fneg (fma (fpext x), (fpext y), z))
9254     // Note: This could be removed with appropriate canonicalization of the
9255     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9256     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9257     // from implementing the canonicalization in visitFSUB.
9258     if (N0.getOpcode() == ISD::FP_EXTEND) {
9259       SDValue N00 = N0.getOperand(0);
9260       if (N00.getOpcode() == ISD::FNEG) {
9261         SDValue N000 = N00.getOperand(0);
9262         if (isContractableFMUL(N000)) {
9263           return DAG.getNode(ISD::FNEG, SL, VT,
9264                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9265                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9266                                                      N000.getOperand(0)),
9267                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9268                                                      N000.getOperand(1)),
9269                                          N1));
9270         }
9271       }
9272     }
9273 
9274     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9275     //   -> (fneg (fma (fpext x)), (fpext y), z)
9276     // Note: This could be removed with appropriate canonicalization of the
9277     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9278     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9279     // from implementing the canonicalization in visitFSUB.
9280     if (N0.getOpcode() == ISD::FNEG) {
9281       SDValue N00 = N0.getOperand(0);
9282       if (N00.getOpcode() == ISD::FP_EXTEND) {
9283         SDValue N000 = N00.getOperand(0);
9284         if (isContractableFMUL(N000)) {
9285           return DAG.getNode(ISD::FNEG, SL, VT,
9286                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9287                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9288                                                      N000.getOperand(0)),
9289                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9290                                                      N000.getOperand(1)),
9291                                          N1));
9292         }
9293       }
9294     }
9295 
9296   }
9297 
9298   // More folding opportunities when target permits.
9299   if (Aggressive) {
9300     // fold (fsub (fma x, y, (fmul u, v)), z)
9301     //   -> (fma x, y (fma u, v, (fneg z)))
9302     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9303     // are currently only supported on binary nodes.
9304     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9305         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9306         N0.getOperand(2)->hasOneUse()) {
9307       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9308                          N0.getOperand(0), N0.getOperand(1),
9309                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9310                                      N0.getOperand(2).getOperand(0),
9311                                      N0.getOperand(2).getOperand(1),
9312                                      DAG.getNode(ISD::FNEG, SL, VT,
9313                                                  N1)));
9314     }
9315 
9316     // fold (fsub x, (fma y, z, (fmul u, v)))
9317     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9318     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9319     // are currently only supported on binary nodes.
9320     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9321         isContractableFMUL(N1.getOperand(2))) {
9322       SDValue N20 = N1.getOperand(2).getOperand(0);
9323       SDValue N21 = N1.getOperand(2).getOperand(1);
9324       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9325                          DAG.getNode(ISD::FNEG, SL, VT,
9326                                      N1.getOperand(0)),
9327                          N1.getOperand(1),
9328                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9329                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9330 
9331                                      N21, N0));
9332     }
9333 
9334     if (LookThroughFPExt) {
9335       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9336       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9337       if (N0.getOpcode() == PreferredFusedOpcode) {
9338         SDValue N02 = N0.getOperand(2);
9339         if (N02.getOpcode() == ISD::FP_EXTEND) {
9340           SDValue N020 = N02.getOperand(0);
9341           if (isContractableFMUL(N020))
9342             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9343                                N0.getOperand(0), N0.getOperand(1),
9344                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9345                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9346                                                        N020.getOperand(0)),
9347                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9348                                                        N020.getOperand(1)),
9349                                            DAG.getNode(ISD::FNEG, SL, VT,
9350                                                        N1)));
9351         }
9352       }
9353 
9354       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9355       //   -> (fma (fpext x), (fpext y),
9356       //           (fma (fpext u), (fpext v), (fneg z)))
9357       // FIXME: This turns two single-precision and one double-precision
9358       // operation into two double-precision operations, which might not be
9359       // interesting for all targets, especially GPUs.
9360       if (N0.getOpcode() == ISD::FP_EXTEND) {
9361         SDValue N00 = N0.getOperand(0);
9362         if (N00.getOpcode() == PreferredFusedOpcode) {
9363           SDValue N002 = N00.getOperand(2);
9364           if (isContractableFMUL(N002))
9365             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9366                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9367                                            N00.getOperand(0)),
9368                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9369                                            N00.getOperand(1)),
9370                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9371                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9372                                                        N002.getOperand(0)),
9373                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9374                                                        N002.getOperand(1)),
9375                                            DAG.getNode(ISD::FNEG, SL, VT,
9376                                                        N1)));
9377         }
9378       }
9379 
9380       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9381       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9382       if (N1.getOpcode() == PreferredFusedOpcode &&
9383         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9384         SDValue N120 = N1.getOperand(2).getOperand(0);
9385         if (isContractableFMUL(N120)) {
9386           SDValue N1200 = N120.getOperand(0);
9387           SDValue N1201 = N120.getOperand(1);
9388           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9389                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9390                              N1.getOperand(1),
9391                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9392                                          DAG.getNode(ISD::FNEG, SL, VT,
9393                                              DAG.getNode(ISD::FP_EXTEND, SL,
9394                                                          VT, N1200)),
9395                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9396                                                      N1201),
9397                                          N0));
9398         }
9399       }
9400 
9401       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9402       //   -> (fma (fneg (fpext y)), (fpext z),
9403       //           (fma (fneg (fpext u)), (fpext v), x))
9404       // FIXME: This turns two single-precision and one double-precision
9405       // operation into two double-precision operations, which might not be
9406       // interesting for all targets, especially GPUs.
9407       if (N1.getOpcode() == ISD::FP_EXTEND &&
9408         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9409         SDValue N100 = N1.getOperand(0).getOperand(0);
9410         SDValue N101 = N1.getOperand(0).getOperand(1);
9411         SDValue N102 = N1.getOperand(0).getOperand(2);
9412         if (isContractableFMUL(N102)) {
9413           SDValue N1020 = N102.getOperand(0);
9414           SDValue N1021 = N102.getOperand(1);
9415           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9416                              DAG.getNode(ISD::FNEG, SL, VT,
9417                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9418                                                      N100)),
9419                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9420                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9421                                          DAG.getNode(ISD::FNEG, SL, VT,
9422                                              DAG.getNode(ISD::FP_EXTEND, SL,
9423                                                          VT, N1020)),
9424                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9425                                                      N1021),
9426                                          N0));
9427         }
9428       }
9429     }
9430   }
9431 
9432   return SDValue();
9433 }
9434 
9435 /// Try to perform FMA combining on a given FMUL node based on the distributive
9436 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9437 /// subtraction instead of addition).
9438 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9439   SDValue N0 = N->getOperand(0);
9440   SDValue N1 = N->getOperand(1);
9441   EVT VT = N->getValueType(0);
9442   SDLoc SL(N);
9443 
9444   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9445 
9446   const TargetOptions &Options = DAG.getTarget().Options;
9447 
9448   // The transforms below are incorrect when x == 0 and y == inf, because the
9449   // intermediate multiplication produces a nan.
9450   if (!Options.NoInfsFPMath)
9451     return SDValue();
9452 
9453   // Floating-point multiply-add without intermediate rounding.
9454   bool HasFMA =
9455       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9456       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9457       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9458 
9459   // Floating-point multiply-add with intermediate rounding. This can result
9460   // in a less precise result due to the changed rounding order.
9461   bool HasFMAD = Options.UnsafeFPMath &&
9462                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9463 
9464   // No valid opcode, do not combine.
9465   if (!HasFMAD && !HasFMA)
9466     return SDValue();
9467 
9468   // Always prefer FMAD to FMA for precision.
9469   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9470   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9471 
9472   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9473   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9474   auto FuseFADD = [&](SDValue X, SDValue Y) {
9475     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9476       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9477       if (XC1 && XC1->isExactlyValue(+1.0))
9478         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9479       if (XC1 && XC1->isExactlyValue(-1.0))
9480         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9481                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9482     }
9483     return SDValue();
9484   };
9485 
9486   if (SDValue FMA = FuseFADD(N0, N1))
9487     return FMA;
9488   if (SDValue FMA = FuseFADD(N1, N0))
9489     return FMA;
9490 
9491   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9492   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9493   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9494   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9495   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9496     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9497       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9498       if (XC0 && XC0->isExactlyValue(+1.0))
9499         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9500                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9501                            Y);
9502       if (XC0 && XC0->isExactlyValue(-1.0))
9503         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9504                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9505                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9506 
9507       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9508       if (XC1 && XC1->isExactlyValue(+1.0))
9509         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9510                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9511       if (XC1 && XC1->isExactlyValue(-1.0))
9512         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9513     }
9514     return SDValue();
9515   };
9516 
9517   if (SDValue FMA = FuseFSUB(N0, N1))
9518     return FMA;
9519   if (SDValue FMA = FuseFSUB(N1, N0))
9520     return FMA;
9521 
9522   return SDValue();
9523 }
9524 
9525 static bool isFMulNegTwo(SDValue &N) {
9526   if (N.getOpcode() != ISD::FMUL)
9527     return false;
9528   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9529     return CFP->isExactlyValue(-2.0);
9530   return false;
9531 }
9532 
9533 SDValue DAGCombiner::visitFADD(SDNode *N) {
9534   SDValue N0 = N->getOperand(0);
9535   SDValue N1 = N->getOperand(1);
9536   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9537   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9538   EVT VT = N->getValueType(0);
9539   SDLoc DL(N);
9540   const TargetOptions &Options = DAG.getTarget().Options;
9541   const SDNodeFlags Flags = N->getFlags();
9542 
9543   // fold vector ops
9544   if (VT.isVector())
9545     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9546       return FoldedVOp;
9547 
9548   // fold (fadd c1, c2) -> c1 + c2
9549   if (N0CFP && N1CFP)
9550     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9551 
9552   // canonicalize constant to RHS
9553   if (N0CFP && !N1CFP)
9554     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9555 
9556   if (SDValue NewSel = foldBinOpIntoSelect(N))
9557     return NewSel;
9558 
9559   // fold (fadd A, (fneg B)) -> (fsub A, B)
9560   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9561       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9562     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9563                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9564 
9565   // fold (fadd (fneg A), B) -> (fsub B, A)
9566   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9567       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9568     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9569                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9570 
9571   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9572   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9573   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9574       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9575     bool N1IsFMul = isFMulNegTwo(N1);
9576     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9577     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9578     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9579   }
9580 
9581   // FIXME: Auto-upgrade the target/function-level option.
9582   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9583     // fold (fadd A, 0) -> A
9584     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9585       if (N1C->isZero())
9586         return N0;
9587   }
9588 
9589   // If 'unsafe math' is enabled, fold lots of things.
9590   if (Options.UnsafeFPMath) {
9591     // No FP constant should be created after legalization as Instruction
9592     // Selection pass has a hard time dealing with FP constants.
9593     bool AllowNewConst = (Level < AfterLegalizeDAG);
9594 
9595     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9596     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9597         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9598       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9599                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9600                                      Flags),
9601                          Flags);
9602 
9603     // If allowed, fold (fadd (fneg x), x) -> 0.0
9604     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9605       return DAG.getConstantFP(0.0, DL, VT);
9606 
9607     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9608     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9609       return DAG.getConstantFP(0.0, DL, VT);
9610 
9611     // We can fold chains of FADD's of the same value into multiplications.
9612     // This transform is not safe in general because we are reducing the number
9613     // of rounding steps.
9614     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9615       if (N0.getOpcode() == ISD::FMUL) {
9616         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9617         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9618 
9619         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9620         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9621           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9622                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9623           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9624         }
9625 
9626         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9627         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9628             N1.getOperand(0) == N1.getOperand(1) &&
9629             N0.getOperand(0) == N1.getOperand(0)) {
9630           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9631                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9632           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9633         }
9634       }
9635 
9636       if (N1.getOpcode() == ISD::FMUL) {
9637         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9638         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9639 
9640         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9641         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9642           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9643                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9644           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9645         }
9646 
9647         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9648         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9649             N0.getOperand(0) == N0.getOperand(1) &&
9650             N1.getOperand(0) == N0.getOperand(0)) {
9651           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9652                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9653           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9654         }
9655       }
9656 
9657       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9658         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9659         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9660         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9661             (N0.getOperand(0) == N1)) {
9662           return DAG.getNode(ISD::FMUL, DL, VT,
9663                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9664         }
9665       }
9666 
9667       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9668         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9669         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9670         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9671             N1.getOperand(0) == N0) {
9672           return DAG.getNode(ISD::FMUL, DL, VT,
9673                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9674         }
9675       }
9676 
9677       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9678       if (AllowNewConst &&
9679           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9680           N0.getOperand(0) == N0.getOperand(1) &&
9681           N1.getOperand(0) == N1.getOperand(1) &&
9682           N0.getOperand(0) == N1.getOperand(0)) {
9683         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9684                            DAG.getConstantFP(4.0, DL, VT), Flags);
9685       }
9686     }
9687   } // enable-unsafe-fp-math
9688 
9689   // FADD -> FMA combines:
9690   if (SDValue Fused = visitFADDForFMACombine(N)) {
9691     AddToWorklist(Fused.getNode());
9692     return Fused;
9693   }
9694   return SDValue();
9695 }
9696 
9697 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9698   SDValue N0 = N->getOperand(0);
9699   SDValue N1 = N->getOperand(1);
9700   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9701   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9702   EVT VT = N->getValueType(0);
9703   SDLoc DL(N);
9704   const TargetOptions &Options = DAG.getTarget().Options;
9705   const SDNodeFlags Flags = N->getFlags();
9706 
9707   // fold vector ops
9708   if (VT.isVector())
9709     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9710       return FoldedVOp;
9711 
9712   // fold (fsub c1, c2) -> c1-c2
9713   if (N0CFP && N1CFP)
9714     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9715 
9716   if (SDValue NewSel = foldBinOpIntoSelect(N))
9717     return NewSel;
9718 
9719   // fold (fsub A, (fneg B)) -> (fadd A, B)
9720   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9721     return DAG.getNode(ISD::FADD, DL, VT, N0,
9722                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9723 
9724   // FIXME: Auto-upgrade the target/function-level option.
9725   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9726     // (fsub 0, B) -> -B
9727     if (N0CFP && N0CFP->isZero()) {
9728       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9729         return GetNegatedExpression(N1, DAG, LegalOperations);
9730       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9731         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9732     }
9733   }
9734 
9735   // If 'unsafe math' is enabled, fold lots of things.
9736   if (Options.UnsafeFPMath) {
9737     // (fsub A, 0) -> A
9738     if (N1CFP && N1CFP->isZero())
9739       return N0;
9740 
9741     // (fsub x, x) -> 0.0
9742     if (N0 == N1)
9743       return DAG.getConstantFP(0.0f, DL, VT);
9744 
9745     // (fsub x, (fadd x, y)) -> (fneg y)
9746     // (fsub x, (fadd y, x)) -> (fneg y)
9747     if (N1.getOpcode() == ISD::FADD) {
9748       SDValue N10 = N1->getOperand(0);
9749       SDValue N11 = N1->getOperand(1);
9750 
9751       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9752         return GetNegatedExpression(N11, DAG, LegalOperations);
9753 
9754       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9755         return GetNegatedExpression(N10, DAG, LegalOperations);
9756     }
9757   }
9758 
9759   // FSUB -> FMA combines:
9760   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9761     AddToWorklist(Fused.getNode());
9762     return Fused;
9763   }
9764 
9765   return SDValue();
9766 }
9767 
9768 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9769   SDValue N0 = N->getOperand(0);
9770   SDValue N1 = N->getOperand(1);
9771   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9772   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9773   EVT VT = N->getValueType(0);
9774   SDLoc DL(N);
9775   const TargetOptions &Options = DAG.getTarget().Options;
9776   const SDNodeFlags Flags = N->getFlags();
9777 
9778   // fold vector ops
9779   if (VT.isVector()) {
9780     // This just handles C1 * C2 for vectors. Other vector folds are below.
9781     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9782       return FoldedVOp;
9783   }
9784 
9785   // fold (fmul c1, c2) -> c1*c2
9786   if (N0CFP && N1CFP)
9787     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9788 
9789   // canonicalize constant to RHS
9790   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9791      !isConstantFPBuildVectorOrConstantFP(N1))
9792     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9793 
9794   // fold (fmul A, 1.0) -> A
9795   if (N1CFP && N1CFP->isExactlyValue(1.0))
9796     return N0;
9797 
9798   if (SDValue NewSel = foldBinOpIntoSelect(N))
9799     return NewSel;
9800 
9801   if (Options.UnsafeFPMath) {
9802     // fold (fmul A, 0) -> 0
9803     if (N1CFP && N1CFP->isZero())
9804       return N1;
9805 
9806     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9807     if (N0.getOpcode() == ISD::FMUL) {
9808       // Fold scalars or any vector constants (not just splats).
9809       // This fold is done in general by InstCombine, but extra fmul insts
9810       // may have been generated during lowering.
9811       SDValue N00 = N0.getOperand(0);
9812       SDValue N01 = N0.getOperand(1);
9813       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9814       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9815       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9816 
9817       // Check 1: Make sure that the first operand of the inner multiply is NOT
9818       // a constant. Otherwise, we may induce infinite looping.
9819       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9820         // Check 2: Make sure that the second operand of the inner multiply and
9821         // the second operand of the outer multiply are constants.
9822         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9823             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9824           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9825           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9826         }
9827       }
9828     }
9829 
9830     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9831     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9832     // during an early run of DAGCombiner can prevent folding with fmuls
9833     // inserted during lowering.
9834     if (N0.getOpcode() == ISD::FADD &&
9835         (N0.getOperand(0) == N0.getOperand(1)) &&
9836         N0.hasOneUse()) {
9837       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9838       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9839       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9840     }
9841   }
9842 
9843   // fold (fmul X, 2.0) -> (fadd X, X)
9844   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9845     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9846 
9847   // fold (fmul X, -1.0) -> (fneg X)
9848   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9849     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9850       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9851 
9852   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9853   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9854     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9855       // Both can be negated for free, check to see if at least one is cheaper
9856       // negated.
9857       if (LHSNeg == 2 || RHSNeg == 2)
9858         return DAG.getNode(ISD::FMUL, DL, VT,
9859                            GetNegatedExpression(N0, DAG, LegalOperations),
9860                            GetNegatedExpression(N1, DAG, LegalOperations),
9861                            Flags);
9862     }
9863   }
9864 
9865   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
9866   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
9867   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
9868       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
9869       TLI.isOperationLegal(ISD::FABS, VT)) {
9870     SDValue Select = N0, X = N1;
9871     if (Select.getOpcode() != ISD::SELECT)
9872       std::swap(Select, X);
9873 
9874     SDValue Cond = Select.getOperand(0);
9875     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
9876     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
9877 
9878     if (TrueOpnd && FalseOpnd &&
9879         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
9880         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
9881         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
9882       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9883       switch (CC) {
9884       default: break;
9885       case ISD::SETOLT:
9886       case ISD::SETULT:
9887       case ISD::SETOLE:
9888       case ISD::SETULE:
9889       case ISD::SETLT:
9890       case ISD::SETLE:
9891         std::swap(TrueOpnd, FalseOpnd);
9892         // Fall through
9893       case ISD::SETOGT:
9894       case ISD::SETUGT:
9895       case ISD::SETOGE:
9896       case ISD::SETUGE:
9897       case ISD::SETGT:
9898       case ISD::SETGE:
9899         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
9900             TLI.isOperationLegal(ISD::FNEG, VT))
9901           return DAG.getNode(ISD::FNEG, DL, VT,
9902                    DAG.getNode(ISD::FABS, DL, VT, X));
9903         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
9904           return DAG.getNode(ISD::FABS, DL, VT, X);
9905 
9906         break;
9907       }
9908     }
9909   }
9910 
9911   // FMUL -> FMA combines:
9912   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9913     AddToWorklist(Fused.getNode());
9914     return Fused;
9915   }
9916 
9917   return SDValue();
9918 }
9919 
9920 SDValue DAGCombiner::visitFMA(SDNode *N) {
9921   SDValue N0 = N->getOperand(0);
9922   SDValue N1 = N->getOperand(1);
9923   SDValue N2 = N->getOperand(2);
9924   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9925   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9926   EVT VT = N->getValueType(0);
9927   SDLoc DL(N);
9928   const TargetOptions &Options = DAG.getTarget().Options;
9929 
9930   // Constant fold FMA.
9931   if (isa<ConstantFPSDNode>(N0) &&
9932       isa<ConstantFPSDNode>(N1) &&
9933       isa<ConstantFPSDNode>(N2)) {
9934     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9935   }
9936 
9937   if (Options.UnsafeFPMath) {
9938     if (N0CFP && N0CFP->isZero())
9939       return N2;
9940     if (N1CFP && N1CFP->isZero())
9941       return N2;
9942   }
9943   // TODO: The FMA node should have flags that propagate to these nodes.
9944   if (N0CFP && N0CFP->isExactlyValue(1.0))
9945     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9946   if (N1CFP && N1CFP->isExactlyValue(1.0))
9947     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9948 
9949   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9950   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9951      !isConstantFPBuildVectorOrConstantFP(N1))
9952     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9953 
9954   // TODO: FMA nodes should have flags that propagate to the created nodes.
9955   // For now, create a Flags object for use with all unsafe math transforms.
9956   SDNodeFlags Flags;
9957   Flags.setUnsafeAlgebra(true);
9958 
9959   if (Options.UnsafeFPMath) {
9960     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9961     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9962         isConstantFPBuildVectorOrConstantFP(N1) &&
9963         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9964       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9965                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9966                                      Flags), Flags);
9967     }
9968 
9969     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9970     if (N0.getOpcode() == ISD::FMUL &&
9971         isConstantFPBuildVectorOrConstantFP(N1) &&
9972         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9973       return DAG.getNode(ISD::FMA, DL, VT,
9974                          N0.getOperand(0),
9975                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9976                                      Flags),
9977                          N2);
9978     }
9979   }
9980 
9981   // (fma x, 1, y) -> (fadd x, y)
9982   // (fma x, -1, y) -> (fadd (fneg x), y)
9983   if (N1CFP) {
9984     if (N1CFP->isExactlyValue(1.0))
9985       // TODO: The FMA node should have flags that propagate to this node.
9986       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9987 
9988     if (N1CFP->isExactlyValue(-1.0) &&
9989         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9990       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9991       AddToWorklist(RHSNeg.getNode());
9992       // TODO: The FMA node should have flags that propagate to this node.
9993       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9994     }
9995   }
9996 
9997   if (Options.UnsafeFPMath) {
9998     // (fma x, c, x) -> (fmul x, (c+1))
9999     if (N1CFP && N0 == N2) {
10000       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10001                          DAG.getNode(ISD::FADD, DL, VT, N1,
10002                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10003                          Flags);
10004     }
10005 
10006     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10007     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10008       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10009                          DAG.getNode(ISD::FADD, DL, VT, N1,
10010                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10011                          Flags);
10012     }
10013   }
10014 
10015   return SDValue();
10016 }
10017 
10018 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10019 // reciprocal.
10020 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10021 // Notice that this is not always beneficial. One reason is different targets
10022 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10023 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10024 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10025 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10026   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10027   const SDNodeFlags Flags = N->getFlags();
10028   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10029     return SDValue();
10030 
10031   // Skip if current node is a reciprocal.
10032   SDValue N0 = N->getOperand(0);
10033   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10034   if (N0CFP && N0CFP->isExactlyValue(1.0))
10035     return SDValue();
10036 
10037   // Exit early if the target does not want this transform or if there can't
10038   // possibly be enough uses of the divisor to make the transform worthwhile.
10039   SDValue N1 = N->getOperand(1);
10040   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10041   if (!MinUses || N1->use_size() < MinUses)
10042     return SDValue();
10043 
10044   // Find all FDIV users of the same divisor.
10045   // Use a set because duplicates may be present in the user list.
10046   SetVector<SDNode *> Users;
10047   for (auto *U : N1->uses()) {
10048     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10049       // This division is eligible for optimization only if global unsafe math
10050       // is enabled or if this division allows reciprocal formation.
10051       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10052         Users.insert(U);
10053     }
10054   }
10055 
10056   // Now that we have the actual number of divisor uses, make sure it meets
10057   // the minimum threshold specified by the target.
10058   if (Users.size() < MinUses)
10059     return SDValue();
10060 
10061   EVT VT = N->getValueType(0);
10062   SDLoc DL(N);
10063   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10064   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10065 
10066   // Dividend / Divisor -> Dividend * Reciprocal
10067   for (auto *U : Users) {
10068     SDValue Dividend = U->getOperand(0);
10069     if (Dividend != FPOne) {
10070       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10071                                     Reciprocal, Flags);
10072       CombineTo(U, NewNode);
10073     } else if (U != Reciprocal.getNode()) {
10074       // In the absence of fast-math-flags, this user node is always the
10075       // same node as Reciprocal, but with FMF they may be different nodes.
10076       CombineTo(U, Reciprocal);
10077     }
10078   }
10079   return SDValue(N, 0);  // N was replaced.
10080 }
10081 
10082 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10083   SDValue N0 = N->getOperand(0);
10084   SDValue N1 = N->getOperand(1);
10085   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10086   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10087   EVT VT = N->getValueType(0);
10088   SDLoc DL(N);
10089   const TargetOptions &Options = DAG.getTarget().Options;
10090   SDNodeFlags Flags = N->getFlags();
10091 
10092   // fold vector ops
10093   if (VT.isVector())
10094     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10095       return FoldedVOp;
10096 
10097   // fold (fdiv c1, c2) -> c1/c2
10098   if (N0CFP && N1CFP)
10099     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10100 
10101   if (SDValue NewSel = foldBinOpIntoSelect(N))
10102     return NewSel;
10103 
10104   if (Options.UnsafeFPMath) {
10105     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10106     if (N1CFP) {
10107       // Compute the reciprocal 1.0 / c2.
10108       const APFloat &N1APF = N1CFP->getValueAPF();
10109       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10110       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10111       // Only do the transform if the reciprocal is a legal fp immediate that
10112       // isn't too nasty (eg NaN, denormal, ...).
10113       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10114           (!LegalOperations ||
10115            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10116            // backend)... we should handle this gracefully after Legalize.
10117            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
10118            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
10119            TLI.isFPImmLegal(Recip, VT)))
10120         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10121                            DAG.getConstantFP(Recip, DL, VT), Flags);
10122     }
10123 
10124     // If this FDIV is part of a reciprocal square root, it may be folded
10125     // into a target-specific square root estimate instruction.
10126     if (N1.getOpcode() == ISD::FSQRT) {
10127       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10128         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10129       }
10130     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10131                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10132       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10133                                           Flags)) {
10134         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10135         AddToWorklist(RV.getNode());
10136         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10137       }
10138     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10139                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10140       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10141                                           Flags)) {
10142         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10143         AddToWorklist(RV.getNode());
10144         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10145       }
10146     } else if (N1.getOpcode() == ISD::FMUL) {
10147       // Look through an FMUL. Even though this won't remove the FDIV directly,
10148       // it's still worthwhile to get rid of the FSQRT if possible.
10149       SDValue SqrtOp;
10150       SDValue OtherOp;
10151       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10152         SqrtOp = N1.getOperand(0);
10153         OtherOp = N1.getOperand(1);
10154       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10155         SqrtOp = N1.getOperand(1);
10156         OtherOp = N1.getOperand(0);
10157       }
10158       if (SqrtOp.getNode()) {
10159         // We found a FSQRT, so try to make this fold:
10160         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10161         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10162           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10163           AddToWorklist(RV.getNode());
10164           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10165         }
10166       }
10167     }
10168 
10169     // Fold into a reciprocal estimate and multiply instead of a real divide.
10170     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10171       AddToWorklist(RV.getNode());
10172       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10173     }
10174   }
10175 
10176   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10177   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10178     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10179       // Both can be negated for free, check to see if at least one is cheaper
10180       // negated.
10181       if (LHSNeg == 2 || RHSNeg == 2)
10182         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10183                            GetNegatedExpression(N0, DAG, LegalOperations),
10184                            GetNegatedExpression(N1, DAG, LegalOperations),
10185                            Flags);
10186     }
10187   }
10188 
10189   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10190     return CombineRepeatedDivisors;
10191 
10192   return SDValue();
10193 }
10194 
10195 SDValue DAGCombiner::visitFREM(SDNode *N) {
10196   SDValue N0 = N->getOperand(0);
10197   SDValue N1 = N->getOperand(1);
10198   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10199   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10200   EVT VT = N->getValueType(0);
10201 
10202   // fold (frem c1, c2) -> fmod(c1,c2)
10203   if (N0CFP && N1CFP)
10204     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10205 
10206   if (SDValue NewSel = foldBinOpIntoSelect(N))
10207     return NewSel;
10208 
10209   return SDValue();
10210 }
10211 
10212 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10213   if (!DAG.getTarget().Options.UnsafeFPMath)
10214     return SDValue();
10215 
10216   SDValue N0 = N->getOperand(0);
10217   if (TLI.isFsqrtCheap(N0, DAG))
10218     return SDValue();
10219 
10220   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10221   // For now, create a Flags object for use with all unsafe math transforms.
10222   SDNodeFlags Flags;
10223   Flags.setUnsafeAlgebra(true);
10224   return buildSqrtEstimate(N0, Flags);
10225 }
10226 
10227 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10228 /// copysign(x, fp_round(y)) -> copysign(x, y)
10229 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10230   SDValue N1 = N->getOperand(1);
10231   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10232        N1.getOpcode() == ISD::FP_ROUND)) {
10233     // Do not optimize out type conversion of f128 type yet.
10234     // For some targets like x86_64, configuration is changed to keep one f128
10235     // value in one SSE register, but instruction selection cannot handle
10236     // FCOPYSIGN on SSE registers yet.
10237     EVT N1VT = N1->getValueType(0);
10238     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10239     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10240   }
10241   return false;
10242 }
10243 
10244 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10245   SDValue N0 = N->getOperand(0);
10246   SDValue N1 = N->getOperand(1);
10247   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10248   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10249   EVT VT = N->getValueType(0);
10250 
10251   if (N0CFP && N1CFP) // Constant fold
10252     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10253 
10254   if (N1CFP) {
10255     const APFloat &V = N1CFP->getValueAPF();
10256     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10257     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10258     if (!V.isNegative()) {
10259       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10260         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10261     } else {
10262       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10263         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10264                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10265     }
10266   }
10267 
10268   // copysign(fabs(x), y) -> copysign(x, y)
10269   // copysign(fneg(x), y) -> copysign(x, y)
10270   // copysign(copysign(x,z), y) -> copysign(x, y)
10271   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10272       N0.getOpcode() == ISD::FCOPYSIGN)
10273     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10274 
10275   // copysign(x, abs(y)) -> abs(x)
10276   if (N1.getOpcode() == ISD::FABS)
10277     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10278 
10279   // copysign(x, copysign(y,z)) -> copysign(x, z)
10280   if (N1.getOpcode() == ISD::FCOPYSIGN)
10281     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10282 
10283   // copysign(x, fp_extend(y)) -> copysign(x, y)
10284   // copysign(x, fp_round(y)) -> copysign(x, y)
10285   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10286     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10287 
10288   return SDValue();
10289 }
10290 
10291 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10292   SDValue N0 = N->getOperand(0);
10293   EVT VT = N->getValueType(0);
10294   EVT OpVT = N0.getValueType();
10295 
10296   // fold (sint_to_fp c1) -> c1fp
10297   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10298       // ...but only if the target supports immediate floating-point values
10299       (!LegalOperations ||
10300        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10301     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10302 
10303   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10304   // but UINT_TO_FP is legal on this target, try to convert.
10305   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10306       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10307     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10308     if (DAG.SignBitIsZero(N0))
10309       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10310   }
10311 
10312   // The next optimizations are desirable only if SELECT_CC can be lowered.
10313   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10314     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10315     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10316         !VT.isVector() &&
10317         (!LegalOperations ||
10318          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10319       SDLoc DL(N);
10320       SDValue Ops[] =
10321         { N0.getOperand(0), N0.getOperand(1),
10322           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10323           N0.getOperand(2) };
10324       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10325     }
10326 
10327     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10328     //      (select_cc x, y, 1.0, 0.0,, cc)
10329     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10330         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10331         (!LegalOperations ||
10332          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10333       SDLoc DL(N);
10334       SDValue Ops[] =
10335         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10336           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10337           N0.getOperand(0).getOperand(2) };
10338       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10339     }
10340   }
10341 
10342   return SDValue();
10343 }
10344 
10345 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10346   SDValue N0 = N->getOperand(0);
10347   EVT VT = N->getValueType(0);
10348   EVT OpVT = N0.getValueType();
10349 
10350   // fold (uint_to_fp c1) -> c1fp
10351   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10352       // ...but only if the target supports immediate floating-point values
10353       (!LegalOperations ||
10354        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10355     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10356 
10357   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10358   // but SINT_TO_FP is legal on this target, try to convert.
10359   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10360       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10361     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10362     if (DAG.SignBitIsZero(N0))
10363       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10364   }
10365 
10366   // The next optimizations are desirable only if SELECT_CC can be lowered.
10367   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10368     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10369 
10370     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10371         (!LegalOperations ||
10372          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10373       SDLoc DL(N);
10374       SDValue Ops[] =
10375         { N0.getOperand(0), N0.getOperand(1),
10376           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10377           N0.getOperand(2) };
10378       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10379     }
10380   }
10381 
10382   return SDValue();
10383 }
10384 
10385 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10386 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10387   SDValue N0 = N->getOperand(0);
10388   EVT VT = N->getValueType(0);
10389 
10390   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10391     return SDValue();
10392 
10393   SDValue Src = N0.getOperand(0);
10394   EVT SrcVT = Src.getValueType();
10395   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10396   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10397 
10398   // We can safely assume the conversion won't overflow the output range,
10399   // because (for example) (uint8_t)18293.f is undefined behavior.
10400 
10401   // Since we can assume the conversion won't overflow, our decision as to
10402   // whether the input will fit in the float should depend on the minimum
10403   // of the input range and output range.
10404 
10405   // This means this is also safe for a signed input and unsigned output, since
10406   // a negative input would lead to undefined behavior.
10407   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10408   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10409   unsigned ActualSize = std::min(InputSize, OutputSize);
10410   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10411 
10412   // We can only fold away the float conversion if the input range can be
10413   // represented exactly in the float range.
10414   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10415     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10416       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10417                                                        : ISD::ZERO_EXTEND;
10418       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10419     }
10420     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10421       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10422     return DAG.getBitcast(VT, Src);
10423   }
10424   return SDValue();
10425 }
10426 
10427 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10428   SDValue N0 = N->getOperand(0);
10429   EVT VT = N->getValueType(0);
10430 
10431   // fold (fp_to_sint c1fp) -> c1
10432   if (isConstantFPBuildVectorOrConstantFP(N0))
10433     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10434 
10435   return FoldIntToFPToInt(N, DAG);
10436 }
10437 
10438 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10439   SDValue N0 = N->getOperand(0);
10440   EVT VT = N->getValueType(0);
10441 
10442   // fold (fp_to_uint c1fp) -> c1
10443   if (isConstantFPBuildVectorOrConstantFP(N0))
10444     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10445 
10446   return FoldIntToFPToInt(N, DAG);
10447 }
10448 
10449 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10450   SDValue N0 = N->getOperand(0);
10451   SDValue N1 = N->getOperand(1);
10452   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10453   EVT VT = N->getValueType(0);
10454 
10455   // fold (fp_round c1fp) -> c1fp
10456   if (N0CFP)
10457     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10458 
10459   // fold (fp_round (fp_extend x)) -> x
10460   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10461     return N0.getOperand(0);
10462 
10463   // fold (fp_round (fp_round x)) -> (fp_round x)
10464   if (N0.getOpcode() == ISD::FP_ROUND) {
10465     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10466     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10467 
10468     // Skip this folding if it results in an fp_round from f80 to f16.
10469     //
10470     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10471     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10472     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10473     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10474     // x86.
10475     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10476       return SDValue();
10477 
10478     // If the first fp_round isn't a value preserving truncation, it might
10479     // introduce a tie in the second fp_round, that wouldn't occur in the
10480     // single-step fp_round we want to fold to.
10481     // In other words, double rounding isn't the same as rounding.
10482     // Also, this is a value preserving truncation iff both fp_round's are.
10483     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10484       SDLoc DL(N);
10485       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10486                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10487     }
10488   }
10489 
10490   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10491   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10492     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10493                               N0.getOperand(0), N1);
10494     AddToWorklist(Tmp.getNode());
10495     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10496                        Tmp, N0.getOperand(1));
10497   }
10498 
10499   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10500     return NewVSel;
10501 
10502   return SDValue();
10503 }
10504 
10505 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10506   SDValue N0 = N->getOperand(0);
10507   EVT VT = N->getValueType(0);
10508   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10509   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10510 
10511   // fold (fp_round_inreg c1fp) -> c1fp
10512   if (N0CFP && isTypeLegal(EVT)) {
10513     SDLoc DL(N);
10514     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10515     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10516   }
10517 
10518   return SDValue();
10519 }
10520 
10521 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10522   SDValue N0 = N->getOperand(0);
10523   EVT VT = N->getValueType(0);
10524 
10525   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10526   if (N->hasOneUse() &&
10527       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10528     return SDValue();
10529 
10530   // fold (fp_extend c1fp) -> c1fp
10531   if (isConstantFPBuildVectorOrConstantFP(N0))
10532     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10533 
10534   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10535   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10536       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10537     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10538 
10539   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10540   // value of X.
10541   if (N0.getOpcode() == ISD::FP_ROUND
10542       && N0.getConstantOperandVal(1) == 1) {
10543     SDValue In = N0.getOperand(0);
10544     if (In.getValueType() == VT) return In;
10545     if (VT.bitsLT(In.getValueType()))
10546       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10547                          In, N0.getOperand(1));
10548     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10549   }
10550 
10551   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10552   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10553        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10554     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10555     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10556                                      LN0->getChain(),
10557                                      LN0->getBasePtr(), N0.getValueType(),
10558                                      LN0->getMemOperand());
10559     CombineTo(N, ExtLoad);
10560     CombineTo(N0.getNode(),
10561               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10562                           N0.getValueType(), ExtLoad,
10563                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10564               ExtLoad.getValue(1));
10565     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10566   }
10567 
10568   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10569     return NewVSel;
10570 
10571   return SDValue();
10572 }
10573 
10574 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10575   SDValue N0 = N->getOperand(0);
10576   EVT VT = N->getValueType(0);
10577 
10578   // fold (fceil c1) -> fceil(c1)
10579   if (isConstantFPBuildVectorOrConstantFP(N0))
10580     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10581 
10582   return SDValue();
10583 }
10584 
10585 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10586   SDValue N0 = N->getOperand(0);
10587   EVT VT = N->getValueType(0);
10588 
10589   // fold (ftrunc c1) -> ftrunc(c1)
10590   if (isConstantFPBuildVectorOrConstantFP(N0))
10591     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10592 
10593   return SDValue();
10594 }
10595 
10596 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10597   SDValue N0 = N->getOperand(0);
10598   EVT VT = N->getValueType(0);
10599 
10600   // fold (ffloor c1) -> ffloor(c1)
10601   if (isConstantFPBuildVectorOrConstantFP(N0))
10602     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10603 
10604   return SDValue();
10605 }
10606 
10607 // FIXME: FNEG and FABS have a lot in common; refactor.
10608 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10609   SDValue N0 = N->getOperand(0);
10610   EVT VT = N->getValueType(0);
10611 
10612   // Constant fold FNEG.
10613   if (isConstantFPBuildVectorOrConstantFP(N0))
10614     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10615 
10616   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10617                          &DAG.getTarget().Options))
10618     return GetNegatedExpression(N0, DAG, LegalOperations);
10619 
10620   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10621   // constant pool values.
10622   if (!TLI.isFNegFree(VT) &&
10623       N0.getOpcode() == ISD::BITCAST &&
10624       N0.getNode()->hasOneUse()) {
10625     SDValue Int = N0.getOperand(0);
10626     EVT IntVT = Int.getValueType();
10627     if (IntVT.isInteger() && !IntVT.isVector()) {
10628       APInt SignMask;
10629       if (N0.getValueType().isVector()) {
10630         // For a vector, get a mask such as 0x80... per scalar element
10631         // and splat it.
10632         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10633         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10634       } else {
10635         // For a scalar, just generate 0x80...
10636         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10637       }
10638       SDLoc DL0(N0);
10639       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10640                         DAG.getConstant(SignMask, DL0, IntVT));
10641       AddToWorklist(Int.getNode());
10642       return DAG.getBitcast(VT, Int);
10643     }
10644   }
10645 
10646   // (fneg (fmul c, x)) -> (fmul -c, x)
10647   if (N0.getOpcode() == ISD::FMUL &&
10648       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10649     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10650     if (CFP1) {
10651       APFloat CVal = CFP1->getValueAPF();
10652       CVal.changeSign();
10653       if (Level >= AfterLegalizeDAG &&
10654           (TLI.isFPImmLegal(CVal, VT) ||
10655            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10656         return DAG.getNode(
10657             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10658             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10659             N0->getFlags());
10660     }
10661   }
10662 
10663   return SDValue();
10664 }
10665 
10666 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10667   SDValue N0 = N->getOperand(0);
10668   SDValue N1 = N->getOperand(1);
10669   EVT VT = N->getValueType(0);
10670   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10671   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10672 
10673   if (N0CFP && N1CFP) {
10674     const APFloat &C0 = N0CFP->getValueAPF();
10675     const APFloat &C1 = N1CFP->getValueAPF();
10676     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10677   }
10678 
10679   // Canonicalize to constant on RHS.
10680   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10681      !isConstantFPBuildVectorOrConstantFP(N1))
10682     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10683 
10684   return SDValue();
10685 }
10686 
10687 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10688   SDValue N0 = N->getOperand(0);
10689   SDValue N1 = N->getOperand(1);
10690   EVT VT = N->getValueType(0);
10691   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10692   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10693 
10694   if (N0CFP && N1CFP) {
10695     const APFloat &C0 = N0CFP->getValueAPF();
10696     const APFloat &C1 = N1CFP->getValueAPF();
10697     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10698   }
10699 
10700   // Canonicalize to constant on RHS.
10701   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10702      !isConstantFPBuildVectorOrConstantFP(N1))
10703     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10704 
10705   return SDValue();
10706 }
10707 
10708 SDValue DAGCombiner::visitFABS(SDNode *N) {
10709   SDValue N0 = N->getOperand(0);
10710   EVT VT = N->getValueType(0);
10711 
10712   // fold (fabs c1) -> fabs(c1)
10713   if (isConstantFPBuildVectorOrConstantFP(N0))
10714     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10715 
10716   // fold (fabs (fabs x)) -> (fabs x)
10717   if (N0.getOpcode() == ISD::FABS)
10718     return N->getOperand(0);
10719 
10720   // fold (fabs (fneg x)) -> (fabs x)
10721   // fold (fabs (fcopysign x, y)) -> (fabs x)
10722   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10723     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10724 
10725   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10726   // constant pool values.
10727   if (!TLI.isFAbsFree(VT) &&
10728       N0.getOpcode() == ISD::BITCAST &&
10729       N0.getNode()->hasOneUse()) {
10730     SDValue Int = N0.getOperand(0);
10731     EVT IntVT = Int.getValueType();
10732     if (IntVT.isInteger() && !IntVT.isVector()) {
10733       APInt SignMask;
10734       if (N0.getValueType().isVector()) {
10735         // For a vector, get a mask such as 0x7f... per scalar element
10736         // and splat it.
10737         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10738         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10739       } else {
10740         // For a scalar, just generate 0x7f...
10741         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10742       }
10743       SDLoc DL(N0);
10744       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10745                         DAG.getConstant(SignMask, DL, IntVT));
10746       AddToWorklist(Int.getNode());
10747       return DAG.getBitcast(N->getValueType(0), Int);
10748     }
10749   }
10750 
10751   return SDValue();
10752 }
10753 
10754 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10755   SDValue Chain = N->getOperand(0);
10756   SDValue N1 = N->getOperand(1);
10757   SDValue N2 = N->getOperand(2);
10758 
10759   // If N is a constant we could fold this into a fallthrough or unconditional
10760   // branch. However that doesn't happen very often in normal code, because
10761   // Instcombine/SimplifyCFG should have handled the available opportunities.
10762   // If we did this folding here, it would be necessary to update the
10763   // MachineBasicBlock CFG, which is awkward.
10764 
10765   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10766   // on the target.
10767   if (N1.getOpcode() == ISD::SETCC &&
10768       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10769                                    N1.getOperand(0).getValueType())) {
10770     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10771                        Chain, N1.getOperand(2),
10772                        N1.getOperand(0), N1.getOperand(1), N2);
10773   }
10774 
10775   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10776       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10777        (N1.getOperand(0).hasOneUse() &&
10778         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10779     SDNode *Trunc = nullptr;
10780     if (N1.getOpcode() == ISD::TRUNCATE) {
10781       // Look pass the truncate.
10782       Trunc = N1.getNode();
10783       N1 = N1.getOperand(0);
10784     }
10785 
10786     // Match this pattern so that we can generate simpler code:
10787     //
10788     //   %a = ...
10789     //   %b = and i32 %a, 2
10790     //   %c = srl i32 %b, 1
10791     //   brcond i32 %c ...
10792     //
10793     // into
10794     //
10795     //   %a = ...
10796     //   %b = and i32 %a, 2
10797     //   %c = setcc eq %b, 0
10798     //   brcond %c ...
10799     //
10800     // This applies only when the AND constant value has one bit set and the
10801     // SRL constant is equal to the log2 of the AND constant. The back-end is
10802     // smart enough to convert the result into a TEST/JMP sequence.
10803     SDValue Op0 = N1.getOperand(0);
10804     SDValue Op1 = N1.getOperand(1);
10805 
10806     if (Op0.getOpcode() == ISD::AND &&
10807         Op1.getOpcode() == ISD::Constant) {
10808       SDValue AndOp1 = Op0.getOperand(1);
10809 
10810       if (AndOp1.getOpcode() == ISD::Constant) {
10811         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10812 
10813         if (AndConst.isPowerOf2() &&
10814             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10815           SDLoc DL(N);
10816           SDValue SetCC =
10817             DAG.getSetCC(DL,
10818                          getSetCCResultType(Op0.getValueType()),
10819                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10820                          ISD::SETNE);
10821 
10822           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10823                                           MVT::Other, Chain, SetCC, N2);
10824           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10825           // will convert it back to (X & C1) >> C2.
10826           CombineTo(N, NewBRCond, false);
10827           // Truncate is dead.
10828           if (Trunc)
10829             deleteAndRecombine(Trunc);
10830           // Replace the uses of SRL with SETCC
10831           WorklistRemover DeadNodes(*this);
10832           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10833           deleteAndRecombine(N1.getNode());
10834           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10835         }
10836       }
10837     }
10838 
10839     if (Trunc)
10840       // Restore N1 if the above transformation doesn't match.
10841       N1 = N->getOperand(1);
10842   }
10843 
10844   // Transform br(xor(x, y)) -> br(x != y)
10845   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10846   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10847     SDNode *TheXor = N1.getNode();
10848     SDValue Op0 = TheXor->getOperand(0);
10849     SDValue Op1 = TheXor->getOperand(1);
10850     if (Op0.getOpcode() == Op1.getOpcode()) {
10851       // Avoid missing important xor optimizations.
10852       if (SDValue Tmp = visitXOR(TheXor)) {
10853         if (Tmp.getNode() != TheXor) {
10854           DEBUG(dbgs() << "\nReplacing.8 ";
10855                 TheXor->dump(&DAG);
10856                 dbgs() << "\nWith: ";
10857                 Tmp.getNode()->dump(&DAG);
10858                 dbgs() << '\n');
10859           WorklistRemover DeadNodes(*this);
10860           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10861           deleteAndRecombine(TheXor);
10862           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10863                              MVT::Other, Chain, Tmp, N2);
10864         }
10865 
10866         // visitXOR has changed XOR's operands or replaced the XOR completely,
10867         // bail out.
10868         return SDValue(N, 0);
10869       }
10870     }
10871 
10872     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10873       bool Equal = false;
10874       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10875           Op0.getOpcode() == ISD::XOR) {
10876         TheXor = Op0.getNode();
10877         Equal = true;
10878       }
10879 
10880       EVT SetCCVT = N1.getValueType();
10881       if (LegalTypes)
10882         SetCCVT = getSetCCResultType(SetCCVT);
10883       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10884                                    SetCCVT,
10885                                    Op0, Op1,
10886                                    Equal ? ISD::SETEQ : ISD::SETNE);
10887       // Replace the uses of XOR with SETCC
10888       WorklistRemover DeadNodes(*this);
10889       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10890       deleteAndRecombine(N1.getNode());
10891       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10892                          MVT::Other, Chain, SetCC, N2);
10893     }
10894   }
10895 
10896   return SDValue();
10897 }
10898 
10899 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10900 //
10901 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10902   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10903   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10904 
10905   // If N is a constant we could fold this into a fallthrough or unconditional
10906   // branch. However that doesn't happen very often in normal code, because
10907   // Instcombine/SimplifyCFG should have handled the available opportunities.
10908   // If we did this folding here, it would be necessary to update the
10909   // MachineBasicBlock CFG, which is awkward.
10910 
10911   // Use SimplifySetCC to simplify SETCC's.
10912   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10913                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10914                                false);
10915   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10916 
10917   // fold to a simpler setcc
10918   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10919     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10920                        N->getOperand(0), Simp.getOperand(2),
10921                        Simp.getOperand(0), Simp.getOperand(1),
10922                        N->getOperand(4));
10923 
10924   return SDValue();
10925 }
10926 
10927 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10928 /// and that N may be folded in the load / store addressing mode.
10929 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10930                                     SelectionDAG &DAG,
10931                                     const TargetLowering &TLI) {
10932   EVT VT;
10933   unsigned AS;
10934 
10935   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10936     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10937       return false;
10938     VT = LD->getMemoryVT();
10939     AS = LD->getAddressSpace();
10940   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10941     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10942       return false;
10943     VT = ST->getMemoryVT();
10944     AS = ST->getAddressSpace();
10945   } else
10946     return false;
10947 
10948   TargetLowering::AddrMode AM;
10949   if (N->getOpcode() == ISD::ADD) {
10950     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10951     if (Offset)
10952       // [reg +/- imm]
10953       AM.BaseOffs = Offset->getSExtValue();
10954     else
10955       // [reg +/- reg]
10956       AM.Scale = 1;
10957   } else if (N->getOpcode() == ISD::SUB) {
10958     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10959     if (Offset)
10960       // [reg +/- imm]
10961       AM.BaseOffs = -Offset->getSExtValue();
10962     else
10963       // [reg +/- reg]
10964       AM.Scale = 1;
10965   } else
10966     return false;
10967 
10968   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10969                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10970 }
10971 
10972 /// Try turning a load/store into a pre-indexed load/store when the base
10973 /// pointer is an add or subtract and it has other uses besides the load/store.
10974 /// After the transformation, the new indexed load/store has effectively folded
10975 /// the add/subtract in and all of its other uses are redirected to the
10976 /// new load/store.
10977 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10978   if (Level < AfterLegalizeDAG)
10979     return false;
10980 
10981   bool isLoad = true;
10982   SDValue Ptr;
10983   EVT VT;
10984   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10985     if (LD->isIndexed())
10986       return false;
10987     VT = LD->getMemoryVT();
10988     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10989         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10990       return false;
10991     Ptr = LD->getBasePtr();
10992   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10993     if (ST->isIndexed())
10994       return false;
10995     VT = ST->getMemoryVT();
10996     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10997         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10998       return false;
10999     Ptr = ST->getBasePtr();
11000     isLoad = false;
11001   } else {
11002     return false;
11003   }
11004 
11005   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11006   // out.  There is no reason to make this a preinc/predec.
11007   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11008       Ptr.getNode()->hasOneUse())
11009     return false;
11010 
11011   // Ask the target to do addressing mode selection.
11012   SDValue BasePtr;
11013   SDValue Offset;
11014   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11015   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11016     return false;
11017 
11018   // Backends without true r+i pre-indexed forms may need to pass a
11019   // constant base with a variable offset so that constant coercion
11020   // will work with the patterns in canonical form.
11021   bool Swapped = false;
11022   if (isa<ConstantSDNode>(BasePtr)) {
11023     std::swap(BasePtr, Offset);
11024     Swapped = true;
11025   }
11026 
11027   // Don't create a indexed load / store with zero offset.
11028   if (isNullConstant(Offset))
11029     return false;
11030 
11031   // Try turning it into a pre-indexed load / store except when:
11032   // 1) The new base ptr is a frame index.
11033   // 2) If N is a store and the new base ptr is either the same as or is a
11034   //    predecessor of the value being stored.
11035   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11036   //    that would create a cycle.
11037   // 4) All uses are load / store ops that use it as old base ptr.
11038 
11039   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11040   // (plus the implicit offset) to a register to preinc anyway.
11041   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11042     return false;
11043 
11044   // Check #2.
11045   if (!isLoad) {
11046     SDValue Val = cast<StoreSDNode>(N)->getValue();
11047     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11048       return false;
11049   }
11050 
11051   // Caches for hasPredecessorHelper.
11052   SmallPtrSet<const SDNode *, 32> Visited;
11053   SmallVector<const SDNode *, 16> Worklist;
11054   Worklist.push_back(N);
11055 
11056   // If the offset is a constant, there may be other adds of constants that
11057   // can be folded with this one. We should do this to avoid having to keep
11058   // a copy of the original base pointer.
11059   SmallVector<SDNode *, 16> OtherUses;
11060   if (isa<ConstantSDNode>(Offset))
11061     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11062                               UE = BasePtr.getNode()->use_end();
11063          UI != UE; ++UI) {
11064       SDUse &Use = UI.getUse();
11065       // Skip the use that is Ptr and uses of other results from BasePtr's
11066       // node (important for nodes that return multiple results).
11067       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11068         continue;
11069 
11070       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11071         continue;
11072 
11073       if (Use.getUser()->getOpcode() != ISD::ADD &&
11074           Use.getUser()->getOpcode() != ISD::SUB) {
11075         OtherUses.clear();
11076         break;
11077       }
11078 
11079       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11080       if (!isa<ConstantSDNode>(Op1)) {
11081         OtherUses.clear();
11082         break;
11083       }
11084 
11085       // FIXME: In some cases, we can be smarter about this.
11086       if (Op1.getValueType() != Offset.getValueType()) {
11087         OtherUses.clear();
11088         break;
11089       }
11090 
11091       OtherUses.push_back(Use.getUser());
11092     }
11093 
11094   if (Swapped)
11095     std::swap(BasePtr, Offset);
11096 
11097   // Now check for #3 and #4.
11098   bool RealUse = false;
11099 
11100   for (SDNode *Use : Ptr.getNode()->uses()) {
11101     if (Use == N)
11102       continue;
11103     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11104       return false;
11105 
11106     // If Ptr may be folded in addressing mode of other use, then it's
11107     // not profitable to do this transformation.
11108     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11109       RealUse = true;
11110   }
11111 
11112   if (!RealUse)
11113     return false;
11114 
11115   SDValue Result;
11116   if (isLoad)
11117     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11118                                 BasePtr, Offset, AM);
11119   else
11120     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11121                                  BasePtr, Offset, AM);
11122   ++PreIndexedNodes;
11123   ++NodesCombined;
11124   DEBUG(dbgs() << "\nReplacing.4 ";
11125         N->dump(&DAG);
11126         dbgs() << "\nWith: ";
11127         Result.getNode()->dump(&DAG);
11128         dbgs() << '\n');
11129   WorklistRemover DeadNodes(*this);
11130   if (isLoad) {
11131     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11132     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11133   } else {
11134     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11135   }
11136 
11137   // Finally, since the node is now dead, remove it from the graph.
11138   deleteAndRecombine(N);
11139 
11140   if (Swapped)
11141     std::swap(BasePtr, Offset);
11142 
11143   // Replace other uses of BasePtr that can be updated to use Ptr
11144   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11145     unsigned OffsetIdx = 1;
11146     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11147       OffsetIdx = 0;
11148     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11149            BasePtr.getNode() && "Expected BasePtr operand");
11150 
11151     // We need to replace ptr0 in the following expression:
11152     //   x0 * offset0 + y0 * ptr0 = t0
11153     // knowing that
11154     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11155     //
11156     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11157     // indexed load/store and the expression that needs to be re-written.
11158     //
11159     // Therefore, we have:
11160     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11161 
11162     ConstantSDNode *CN =
11163       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11164     int X0, X1, Y0, Y1;
11165     const APInt &Offset0 = CN->getAPIntValue();
11166     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11167 
11168     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11169     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11170     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11171     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11172 
11173     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11174 
11175     APInt CNV = Offset0;
11176     if (X0 < 0) CNV = -CNV;
11177     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11178     else CNV = CNV - Offset1;
11179 
11180     SDLoc DL(OtherUses[i]);
11181 
11182     // We can now generate the new expression.
11183     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11184     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11185 
11186     SDValue NewUse = DAG.getNode(Opcode,
11187                                  DL,
11188                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11189     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11190     deleteAndRecombine(OtherUses[i]);
11191   }
11192 
11193   // Replace the uses of Ptr with uses of the updated base value.
11194   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11195   deleteAndRecombine(Ptr.getNode());
11196 
11197   return true;
11198 }
11199 
11200 /// Try to combine a load/store with a add/sub of the base pointer node into a
11201 /// post-indexed load/store. The transformation folded the add/subtract into the
11202 /// new indexed load/store effectively and all of its uses are redirected to the
11203 /// new load/store.
11204 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11205   if (Level < AfterLegalizeDAG)
11206     return false;
11207 
11208   bool isLoad = true;
11209   SDValue Ptr;
11210   EVT VT;
11211   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11212     if (LD->isIndexed())
11213       return false;
11214     VT = LD->getMemoryVT();
11215     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11216         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11217       return false;
11218     Ptr = LD->getBasePtr();
11219   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11220     if (ST->isIndexed())
11221       return false;
11222     VT = ST->getMemoryVT();
11223     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11224         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11225       return false;
11226     Ptr = ST->getBasePtr();
11227     isLoad = false;
11228   } else {
11229     return false;
11230   }
11231 
11232   if (Ptr.getNode()->hasOneUse())
11233     return false;
11234 
11235   for (SDNode *Op : Ptr.getNode()->uses()) {
11236     if (Op == N ||
11237         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11238       continue;
11239 
11240     SDValue BasePtr;
11241     SDValue Offset;
11242     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11243     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11244       // Don't create a indexed load / store with zero offset.
11245       if (isNullConstant(Offset))
11246         continue;
11247 
11248       // Try turning it into a post-indexed load / store except when
11249       // 1) All uses are load / store ops that use it as base ptr (and
11250       //    it may be folded as addressing mmode).
11251       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11252       //    nor a successor of N. Otherwise, if Op is folded that would
11253       //    create a cycle.
11254 
11255       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11256         continue;
11257 
11258       // Check for #1.
11259       bool TryNext = false;
11260       for (SDNode *Use : BasePtr.getNode()->uses()) {
11261         if (Use == Ptr.getNode())
11262           continue;
11263 
11264         // If all the uses are load / store addresses, then don't do the
11265         // transformation.
11266         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11267           bool RealUse = false;
11268           for (SDNode *UseUse : Use->uses()) {
11269             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11270               RealUse = true;
11271           }
11272 
11273           if (!RealUse) {
11274             TryNext = true;
11275             break;
11276           }
11277         }
11278       }
11279 
11280       if (TryNext)
11281         continue;
11282 
11283       // Check for #2
11284       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11285         SDValue Result = isLoad
11286           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11287                                BasePtr, Offset, AM)
11288           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11289                                 BasePtr, Offset, AM);
11290         ++PostIndexedNodes;
11291         ++NodesCombined;
11292         DEBUG(dbgs() << "\nReplacing.5 ";
11293               N->dump(&DAG);
11294               dbgs() << "\nWith: ";
11295               Result.getNode()->dump(&DAG);
11296               dbgs() << '\n');
11297         WorklistRemover DeadNodes(*this);
11298         if (isLoad) {
11299           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11300           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11301         } else {
11302           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11303         }
11304 
11305         // Finally, since the node is now dead, remove it from the graph.
11306         deleteAndRecombine(N);
11307 
11308         // Replace the uses of Use with uses of the updated base value.
11309         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11310                                       Result.getValue(isLoad ? 1 : 0));
11311         deleteAndRecombine(Op);
11312         return true;
11313       }
11314     }
11315   }
11316 
11317   return false;
11318 }
11319 
11320 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11321 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11322   ISD::MemIndexedMode AM = LD->getAddressingMode();
11323   assert(AM != ISD::UNINDEXED);
11324   SDValue BP = LD->getOperand(1);
11325   SDValue Inc = LD->getOperand(2);
11326 
11327   // Some backends use TargetConstants for load offsets, but don't expect
11328   // TargetConstants in general ADD nodes. We can convert these constants into
11329   // regular Constants (if the constant is not opaque).
11330   assert((Inc.getOpcode() != ISD::TargetConstant ||
11331           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11332          "Cannot split out indexing using opaque target constants");
11333   if (Inc.getOpcode() == ISD::TargetConstant) {
11334     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11335     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11336                           ConstInc->getValueType(0));
11337   }
11338 
11339   unsigned Opc =
11340       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11341   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11342 }
11343 
11344 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11345   LoadSDNode *LD  = cast<LoadSDNode>(N);
11346   SDValue Chain = LD->getChain();
11347   SDValue Ptr   = LD->getBasePtr();
11348 
11349   // If load is not volatile and there are no uses of the loaded value (and
11350   // the updated indexed value in case of indexed loads), change uses of the
11351   // chain value into uses of the chain input (i.e. delete the dead load).
11352   if (!LD->isVolatile()) {
11353     if (N->getValueType(1) == MVT::Other) {
11354       // Unindexed loads.
11355       if (!N->hasAnyUseOfValue(0)) {
11356         // It's not safe to use the two value CombineTo variant here. e.g.
11357         // v1, chain2 = load chain1, loc
11358         // v2, chain3 = load chain2, loc
11359         // v3         = add v2, c
11360         // Now we replace use of chain2 with chain1.  This makes the second load
11361         // isomorphic to the one we are deleting, and thus makes this load live.
11362         DEBUG(dbgs() << "\nReplacing.6 ";
11363               N->dump(&DAG);
11364               dbgs() << "\nWith chain: ";
11365               Chain.getNode()->dump(&DAG);
11366               dbgs() << "\n");
11367         WorklistRemover DeadNodes(*this);
11368         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11369         AddUsersToWorklist(Chain.getNode());
11370         if (N->use_empty())
11371           deleteAndRecombine(N);
11372 
11373         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11374       }
11375     } else {
11376       // Indexed loads.
11377       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11378 
11379       // If this load has an opaque TargetConstant offset, then we cannot split
11380       // the indexing into an add/sub directly (that TargetConstant may not be
11381       // valid for a different type of node, and we cannot convert an opaque
11382       // target constant into a regular constant).
11383       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11384                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11385 
11386       if (!N->hasAnyUseOfValue(0) &&
11387           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11388         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11389         SDValue Index;
11390         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11391           Index = SplitIndexingFromLoad(LD);
11392           // Try to fold the base pointer arithmetic into subsequent loads and
11393           // stores.
11394           AddUsersToWorklist(N);
11395         } else
11396           Index = DAG.getUNDEF(N->getValueType(1));
11397         DEBUG(dbgs() << "\nReplacing.7 ";
11398               N->dump(&DAG);
11399               dbgs() << "\nWith: ";
11400               Undef.getNode()->dump(&DAG);
11401               dbgs() << " and 2 other values\n");
11402         WorklistRemover DeadNodes(*this);
11403         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11404         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11405         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11406         deleteAndRecombine(N);
11407         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11408       }
11409     }
11410   }
11411 
11412   // If this load is directly stored, replace the load value with the stored
11413   // value.
11414   // TODO: Handle store large -> read small portion.
11415   // TODO: Handle TRUNCSTORE/LOADEXT
11416   if (OptLevel != CodeGenOpt::None &&
11417       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11418     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11419       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11420       if (PrevST->getBasePtr() == Ptr &&
11421           PrevST->getValue().getValueType() == N->getValueType(0))
11422         return CombineTo(N, PrevST->getOperand(1), Chain);
11423     }
11424   }
11425 
11426   // Try to infer better alignment information than the load already has.
11427   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11428     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11429       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11430         SDValue NewLoad = DAG.getExtLoad(
11431             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11432             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11433             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11434         if (NewLoad.getNode() != N)
11435           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11436       }
11437     }
11438   }
11439 
11440   if (LD->isUnindexed()) {
11441     // Walk up chain skipping non-aliasing memory nodes.
11442     SDValue BetterChain = FindBetterChain(N, Chain);
11443 
11444     // If there is a better chain.
11445     if (Chain != BetterChain) {
11446       SDValue ReplLoad;
11447 
11448       // Replace the chain to void dependency.
11449       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11450         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11451                                BetterChain, Ptr, LD->getMemOperand());
11452       } else {
11453         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11454                                   LD->getValueType(0),
11455                                   BetterChain, Ptr, LD->getMemoryVT(),
11456                                   LD->getMemOperand());
11457       }
11458 
11459       // Create token factor to keep old chain connected.
11460       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11461                                   MVT::Other, Chain, ReplLoad.getValue(1));
11462 
11463       // Replace uses with load result and token factor
11464       return CombineTo(N, ReplLoad.getValue(0), Token);
11465     }
11466   }
11467 
11468   // Try transforming N to an indexed load.
11469   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11470     return SDValue(N, 0);
11471 
11472   // Try to slice up N to more direct loads if the slices are mapped to
11473   // different register banks or pairing can take place.
11474   if (SliceUpLoad(N))
11475     return SDValue(N, 0);
11476 
11477   return SDValue();
11478 }
11479 
11480 namespace {
11481 /// \brief Helper structure used to slice a load in smaller loads.
11482 /// Basically a slice is obtained from the following sequence:
11483 /// Origin = load Ty1, Base
11484 /// Shift = srl Ty1 Origin, CstTy Amount
11485 /// Inst = trunc Shift to Ty2
11486 ///
11487 /// Then, it will be rewritten into:
11488 /// Slice = load SliceTy, Base + SliceOffset
11489 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11490 ///
11491 /// SliceTy is deduced from the number of bits that are actually used to
11492 /// build Inst.
11493 struct LoadedSlice {
11494   /// \brief Helper structure used to compute the cost of a slice.
11495   struct Cost {
11496     /// Are we optimizing for code size.
11497     bool ForCodeSize;
11498     /// Various cost.
11499     unsigned Loads;
11500     unsigned Truncates;
11501     unsigned CrossRegisterBanksCopies;
11502     unsigned ZExts;
11503     unsigned Shift;
11504 
11505     Cost(bool ForCodeSize = false)
11506         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11507           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11508 
11509     /// \brief Get the cost of one isolated slice.
11510     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11511         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11512           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11513       EVT TruncType = LS.Inst->getValueType(0);
11514       EVT LoadedType = LS.getLoadedType();
11515       if (TruncType != LoadedType &&
11516           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11517         ZExts = 1;
11518     }
11519 
11520     /// \brief Account for slicing gain in the current cost.
11521     /// Slicing provide a few gains like removing a shift or a
11522     /// truncate. This method allows to grow the cost of the original
11523     /// load with the gain from this slice.
11524     void addSliceGain(const LoadedSlice &LS) {
11525       // Each slice saves a truncate.
11526       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11527       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11528                               LS.Inst->getValueType(0)))
11529         ++Truncates;
11530       // If there is a shift amount, this slice gets rid of it.
11531       if (LS.Shift)
11532         ++Shift;
11533       // If this slice can merge a cross register bank copy, account for it.
11534       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11535         ++CrossRegisterBanksCopies;
11536     }
11537 
11538     Cost &operator+=(const Cost &RHS) {
11539       Loads += RHS.Loads;
11540       Truncates += RHS.Truncates;
11541       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11542       ZExts += RHS.ZExts;
11543       Shift += RHS.Shift;
11544       return *this;
11545     }
11546 
11547     bool operator==(const Cost &RHS) const {
11548       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11549              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11550              ZExts == RHS.ZExts && Shift == RHS.Shift;
11551     }
11552 
11553     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11554 
11555     bool operator<(const Cost &RHS) const {
11556       // Assume cross register banks copies are as expensive as loads.
11557       // FIXME: Do we want some more target hooks?
11558       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11559       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11560       // Unless we are optimizing for code size, consider the
11561       // expensive operation first.
11562       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11563         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11564       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11565              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11566     }
11567 
11568     bool operator>(const Cost &RHS) const { return RHS < *this; }
11569 
11570     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11571 
11572     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11573   };
11574   // The last instruction that represent the slice. This should be a
11575   // truncate instruction.
11576   SDNode *Inst;
11577   // The original load instruction.
11578   LoadSDNode *Origin;
11579   // The right shift amount in bits from the original load.
11580   unsigned Shift;
11581   // The DAG from which Origin came from.
11582   // This is used to get some contextual information about legal types, etc.
11583   SelectionDAG *DAG;
11584 
11585   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11586               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11587       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11588 
11589   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11590   /// \return Result is \p BitWidth and has used bits set to 1 and
11591   ///         not used bits set to 0.
11592   APInt getUsedBits() const {
11593     // Reproduce the trunc(lshr) sequence:
11594     // - Start from the truncated value.
11595     // - Zero extend to the desired bit width.
11596     // - Shift left.
11597     assert(Origin && "No original load to compare against.");
11598     unsigned BitWidth = Origin->getValueSizeInBits(0);
11599     assert(Inst && "This slice is not bound to an instruction");
11600     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11601            "Extracted slice is bigger than the whole type!");
11602     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11603     UsedBits.setAllBits();
11604     UsedBits = UsedBits.zext(BitWidth);
11605     UsedBits <<= Shift;
11606     return UsedBits;
11607   }
11608 
11609   /// \brief Get the size of the slice to be loaded in bytes.
11610   unsigned getLoadedSize() const {
11611     unsigned SliceSize = getUsedBits().countPopulation();
11612     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11613     return SliceSize / 8;
11614   }
11615 
11616   /// \brief Get the type that will be loaded for this slice.
11617   /// Note: This may not be the final type for the slice.
11618   EVT getLoadedType() const {
11619     assert(DAG && "Missing context");
11620     LLVMContext &Ctxt = *DAG->getContext();
11621     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11622   }
11623 
11624   /// \brief Get the alignment of the load used for this slice.
11625   unsigned getAlignment() const {
11626     unsigned Alignment = Origin->getAlignment();
11627     unsigned Offset = getOffsetFromBase();
11628     if (Offset != 0)
11629       Alignment = MinAlign(Alignment, Alignment + Offset);
11630     return Alignment;
11631   }
11632 
11633   /// \brief Check if this slice can be rewritten with legal operations.
11634   bool isLegal() const {
11635     // An invalid slice is not legal.
11636     if (!Origin || !Inst || !DAG)
11637       return false;
11638 
11639     // Offsets are for indexed load only, we do not handle that.
11640     if (!Origin->getOffset().isUndef())
11641       return false;
11642 
11643     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11644 
11645     // Check that the type is legal.
11646     EVT SliceType = getLoadedType();
11647     if (!TLI.isTypeLegal(SliceType))
11648       return false;
11649 
11650     // Check that the load is legal for this type.
11651     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11652       return false;
11653 
11654     // Check that the offset can be computed.
11655     // 1. Check its type.
11656     EVT PtrType = Origin->getBasePtr().getValueType();
11657     if (PtrType == MVT::Untyped || PtrType.isExtended())
11658       return false;
11659 
11660     // 2. Check that it fits in the immediate.
11661     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11662       return false;
11663 
11664     // 3. Check that the computation is legal.
11665     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11666       return false;
11667 
11668     // Check that the zext is legal if it needs one.
11669     EVT TruncateType = Inst->getValueType(0);
11670     if (TruncateType != SliceType &&
11671         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11672       return false;
11673 
11674     return true;
11675   }
11676 
11677   /// \brief Get the offset in bytes of this slice in the original chunk of
11678   /// bits.
11679   /// \pre DAG != nullptr.
11680   uint64_t getOffsetFromBase() const {
11681     assert(DAG && "Missing context.");
11682     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11683     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11684     uint64_t Offset = Shift / 8;
11685     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11686     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11687            "The size of the original loaded type is not a multiple of a"
11688            " byte.");
11689     // If Offset is bigger than TySizeInBytes, it means we are loading all
11690     // zeros. This should have been optimized before in the process.
11691     assert(TySizeInBytes > Offset &&
11692            "Invalid shift amount for given loaded size");
11693     if (IsBigEndian)
11694       Offset = TySizeInBytes - Offset - getLoadedSize();
11695     return Offset;
11696   }
11697 
11698   /// \brief Generate the sequence of instructions to load the slice
11699   /// represented by this object and redirect the uses of this slice to
11700   /// this new sequence of instructions.
11701   /// \pre this->Inst && this->Origin are valid Instructions and this
11702   /// object passed the legal check: LoadedSlice::isLegal returned true.
11703   /// \return The last instruction of the sequence used to load the slice.
11704   SDValue loadSlice() const {
11705     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11706     const SDValue &OldBaseAddr = Origin->getBasePtr();
11707     SDValue BaseAddr = OldBaseAddr;
11708     // Get the offset in that chunk of bytes w.r.t. the endianness.
11709     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11710     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11711     if (Offset) {
11712       // BaseAddr = BaseAddr + Offset.
11713       EVT ArithType = BaseAddr.getValueType();
11714       SDLoc DL(Origin);
11715       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11716                               DAG->getConstant(Offset, DL, ArithType));
11717     }
11718 
11719     // Create the type of the loaded slice according to its size.
11720     EVT SliceType = getLoadedType();
11721 
11722     // Create the load for the slice.
11723     SDValue LastInst =
11724         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11725                      Origin->getPointerInfo().getWithOffset(Offset),
11726                      getAlignment(), Origin->getMemOperand()->getFlags());
11727     // If the final type is not the same as the loaded type, this means that
11728     // we have to pad with zero. Create a zero extend for that.
11729     EVT FinalType = Inst->getValueType(0);
11730     if (SliceType != FinalType)
11731       LastInst =
11732           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11733     return LastInst;
11734   }
11735 
11736   /// \brief Check if this slice can be merged with an expensive cross register
11737   /// bank copy. E.g.,
11738   /// i = load i32
11739   /// f = bitcast i32 i to float
11740   bool canMergeExpensiveCrossRegisterBankCopy() const {
11741     if (!Inst || !Inst->hasOneUse())
11742       return false;
11743     SDNode *Use = *Inst->use_begin();
11744     if (Use->getOpcode() != ISD::BITCAST)
11745       return false;
11746     assert(DAG && "Missing context");
11747     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11748     EVT ResVT = Use->getValueType(0);
11749     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11750     const TargetRegisterClass *ArgRC =
11751         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11752     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11753       return false;
11754 
11755     // At this point, we know that we perform a cross-register-bank copy.
11756     // Check if it is expensive.
11757     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11758     // Assume bitcasts are cheap, unless both register classes do not
11759     // explicitly share a common sub class.
11760     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11761       return false;
11762 
11763     // Check if it will be merged with the load.
11764     // 1. Check the alignment constraint.
11765     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11766         ResVT.getTypeForEVT(*DAG->getContext()));
11767 
11768     if (RequiredAlignment > getAlignment())
11769       return false;
11770 
11771     // 2. Check that the load is a legal operation for that type.
11772     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11773       return false;
11774 
11775     // 3. Check that we do not have a zext in the way.
11776     if (Inst->getValueType(0) != getLoadedType())
11777       return false;
11778 
11779     return true;
11780   }
11781 };
11782 }
11783 
11784 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11785 /// \p UsedBits looks like 0..0 1..1 0..0.
11786 static bool areUsedBitsDense(const APInt &UsedBits) {
11787   // If all the bits are one, this is dense!
11788   if (UsedBits.isAllOnesValue())
11789     return true;
11790 
11791   // Get rid of the unused bits on the right.
11792   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11793   // Get rid of the unused bits on the left.
11794   if (NarrowedUsedBits.countLeadingZeros())
11795     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11796   // Check that the chunk of bits is completely used.
11797   return NarrowedUsedBits.isAllOnesValue();
11798 }
11799 
11800 /// \brief Check whether or not \p First and \p Second are next to each other
11801 /// in memory. This means that there is no hole between the bits loaded
11802 /// by \p First and the bits loaded by \p Second.
11803 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11804                                      const LoadedSlice &Second) {
11805   assert(First.Origin == Second.Origin && First.Origin &&
11806          "Unable to match different memory origins.");
11807   APInt UsedBits = First.getUsedBits();
11808   assert((UsedBits & Second.getUsedBits()) == 0 &&
11809          "Slices are not supposed to overlap.");
11810   UsedBits |= Second.getUsedBits();
11811   return areUsedBitsDense(UsedBits);
11812 }
11813 
11814 /// \brief Adjust the \p GlobalLSCost according to the target
11815 /// paring capabilities and the layout of the slices.
11816 /// \pre \p GlobalLSCost should account for at least as many loads as
11817 /// there is in the slices in \p LoadedSlices.
11818 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11819                                  LoadedSlice::Cost &GlobalLSCost) {
11820   unsigned NumberOfSlices = LoadedSlices.size();
11821   // If there is less than 2 elements, no pairing is possible.
11822   if (NumberOfSlices < 2)
11823     return;
11824 
11825   // Sort the slices so that elements that are likely to be next to each
11826   // other in memory are next to each other in the list.
11827   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11828             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11829     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11830     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11831   });
11832   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11833   // First (resp. Second) is the first (resp. Second) potentially candidate
11834   // to be placed in a paired load.
11835   const LoadedSlice *First = nullptr;
11836   const LoadedSlice *Second = nullptr;
11837   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11838                 // Set the beginning of the pair.
11839                                                            First = Second) {
11840 
11841     Second = &LoadedSlices[CurrSlice];
11842 
11843     // If First is NULL, it means we start a new pair.
11844     // Get to the next slice.
11845     if (!First)
11846       continue;
11847 
11848     EVT LoadedType = First->getLoadedType();
11849 
11850     // If the types of the slices are different, we cannot pair them.
11851     if (LoadedType != Second->getLoadedType())
11852       continue;
11853 
11854     // Check if the target supplies paired loads for this type.
11855     unsigned RequiredAlignment = 0;
11856     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11857       // move to the next pair, this type is hopeless.
11858       Second = nullptr;
11859       continue;
11860     }
11861     // Check if we meet the alignment requirement.
11862     if (RequiredAlignment > First->getAlignment())
11863       continue;
11864 
11865     // Check that both loads are next to each other in memory.
11866     if (!areSlicesNextToEachOther(*First, *Second))
11867       continue;
11868 
11869     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11870     --GlobalLSCost.Loads;
11871     // Move to the next pair.
11872     Second = nullptr;
11873   }
11874 }
11875 
11876 /// \brief Check the profitability of all involved LoadedSlice.
11877 /// Currently, it is considered profitable if there is exactly two
11878 /// involved slices (1) which are (2) next to each other in memory, and
11879 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11880 ///
11881 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11882 /// the elements themselves.
11883 ///
11884 /// FIXME: When the cost model will be mature enough, we can relax
11885 /// constraints (1) and (2).
11886 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11887                                 const APInt &UsedBits, bool ForCodeSize) {
11888   unsigned NumberOfSlices = LoadedSlices.size();
11889   if (StressLoadSlicing)
11890     return NumberOfSlices > 1;
11891 
11892   // Check (1).
11893   if (NumberOfSlices != 2)
11894     return false;
11895 
11896   // Check (2).
11897   if (!areUsedBitsDense(UsedBits))
11898     return false;
11899 
11900   // Check (3).
11901   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11902   // The original code has one big load.
11903   OrigCost.Loads = 1;
11904   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11905     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11906     // Accumulate the cost of all the slices.
11907     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11908     GlobalSlicingCost += SliceCost;
11909 
11910     // Account as cost in the original configuration the gain obtained
11911     // with the current slices.
11912     OrigCost.addSliceGain(LS);
11913   }
11914 
11915   // If the target supports paired load, adjust the cost accordingly.
11916   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11917   return OrigCost > GlobalSlicingCost;
11918 }
11919 
11920 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11921 /// operations, split it in the various pieces being extracted.
11922 ///
11923 /// This sort of thing is introduced by SROA.
11924 /// This slicing takes care not to insert overlapping loads.
11925 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11926 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11927   if (Level < AfterLegalizeDAG)
11928     return false;
11929 
11930   LoadSDNode *LD = cast<LoadSDNode>(N);
11931   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11932       !LD->getValueType(0).isInteger())
11933     return false;
11934 
11935   // Keep track of already used bits to detect overlapping values.
11936   // In that case, we will just abort the transformation.
11937   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11938 
11939   SmallVector<LoadedSlice, 4> LoadedSlices;
11940 
11941   // Check if this load is used as several smaller chunks of bits.
11942   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11943   // of computation for each trunc.
11944   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11945        UI != UIEnd; ++UI) {
11946     // Skip the uses of the chain.
11947     if (UI.getUse().getResNo() != 0)
11948       continue;
11949 
11950     SDNode *User = *UI;
11951     unsigned Shift = 0;
11952 
11953     // Check if this is a trunc(lshr).
11954     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11955         isa<ConstantSDNode>(User->getOperand(1))) {
11956       Shift = User->getConstantOperandVal(1);
11957       User = *User->use_begin();
11958     }
11959 
11960     // At this point, User is a Truncate, iff we encountered, trunc or
11961     // trunc(lshr).
11962     if (User->getOpcode() != ISD::TRUNCATE)
11963       return false;
11964 
11965     // The width of the type must be a power of 2 and greater than 8-bits.
11966     // Otherwise the load cannot be represented in LLVM IR.
11967     // Moreover, if we shifted with a non-8-bits multiple, the slice
11968     // will be across several bytes. We do not support that.
11969     unsigned Width = User->getValueSizeInBits(0);
11970     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11971       return 0;
11972 
11973     // Build the slice for this chain of computations.
11974     LoadedSlice LS(User, LD, Shift, &DAG);
11975     APInt CurrentUsedBits = LS.getUsedBits();
11976 
11977     // Check if this slice overlaps with another.
11978     if ((CurrentUsedBits & UsedBits) != 0)
11979       return false;
11980     // Update the bits used globally.
11981     UsedBits |= CurrentUsedBits;
11982 
11983     // Check if the new slice would be legal.
11984     if (!LS.isLegal())
11985       return false;
11986 
11987     // Record the slice.
11988     LoadedSlices.push_back(LS);
11989   }
11990 
11991   // Abort slicing if it does not seem to be profitable.
11992   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11993     return false;
11994 
11995   ++SlicedLoads;
11996 
11997   // Rewrite each chain to use an independent load.
11998   // By construction, each chain can be represented by a unique load.
11999 
12000   // Prepare the argument for the new token factor for all the slices.
12001   SmallVector<SDValue, 8> ArgChains;
12002   for (SmallVectorImpl<LoadedSlice>::const_iterator
12003            LSIt = LoadedSlices.begin(),
12004            LSItEnd = LoadedSlices.end();
12005        LSIt != LSItEnd; ++LSIt) {
12006     SDValue SliceInst = LSIt->loadSlice();
12007     CombineTo(LSIt->Inst, SliceInst, true);
12008     if (SliceInst.getOpcode() != ISD::LOAD)
12009       SliceInst = SliceInst.getOperand(0);
12010     assert(SliceInst->getOpcode() == ISD::LOAD &&
12011            "It takes more than a zext to get to the loaded slice!!");
12012     ArgChains.push_back(SliceInst.getValue(1));
12013   }
12014 
12015   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12016                               ArgChains);
12017   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12018   AddToWorklist(Chain.getNode());
12019   return true;
12020 }
12021 
12022 /// Check to see if V is (and load (ptr), imm), where the load is having
12023 /// specific bytes cleared out.  If so, return the byte size being masked out
12024 /// and the shift amount.
12025 static std::pair<unsigned, unsigned>
12026 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12027   std::pair<unsigned, unsigned> Result(0, 0);
12028 
12029   // Check for the structure we're looking for.
12030   if (V->getOpcode() != ISD::AND ||
12031       !isa<ConstantSDNode>(V->getOperand(1)) ||
12032       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12033     return Result;
12034 
12035   // Check the chain and pointer.
12036   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12037   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12038 
12039   // The store should be chained directly to the load or be an operand of a
12040   // tokenfactor.
12041   if (LD == Chain.getNode())
12042     ; // ok.
12043   else if (Chain->getOpcode() != ISD::TokenFactor)
12044     return Result; // Fail.
12045   else {
12046     bool isOk = false;
12047     for (const SDValue &ChainOp : Chain->op_values())
12048       if (ChainOp.getNode() == LD) {
12049         isOk = true;
12050         break;
12051       }
12052     if (!isOk) return Result;
12053   }
12054 
12055   // This only handles simple types.
12056   if (V.getValueType() != MVT::i16 &&
12057       V.getValueType() != MVT::i32 &&
12058       V.getValueType() != MVT::i64)
12059     return Result;
12060 
12061   // Check the constant mask.  Invert it so that the bits being masked out are
12062   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12063   // follow the sign bit for uniformity.
12064   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12065   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12066   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12067   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12068   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12069   if (NotMaskLZ == 64) return Result;  // All zero mask.
12070 
12071   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12072   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12073     return Result;
12074 
12075   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12076   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12077     NotMaskLZ -= 64-V.getValueSizeInBits();
12078 
12079   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12080   switch (MaskedBytes) {
12081   case 1:
12082   case 2:
12083   case 4: break;
12084   default: return Result; // All one mask, or 5-byte mask.
12085   }
12086 
12087   // Verify that the first bit starts at a multiple of mask so that the access
12088   // is aligned the same as the access width.
12089   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12090 
12091   Result.first = MaskedBytes;
12092   Result.second = NotMaskTZ/8;
12093   return Result;
12094 }
12095 
12096 
12097 /// Check to see if IVal is something that provides a value as specified by
12098 /// MaskInfo. If so, replace the specified store with a narrower store of
12099 /// truncated IVal.
12100 static SDNode *
12101 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12102                                 SDValue IVal, StoreSDNode *St,
12103                                 DAGCombiner *DC) {
12104   unsigned NumBytes = MaskInfo.first;
12105   unsigned ByteShift = MaskInfo.second;
12106   SelectionDAG &DAG = DC->getDAG();
12107 
12108   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12109   // that uses this.  If not, this is not a replacement.
12110   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12111                                   ByteShift*8, (ByteShift+NumBytes)*8);
12112   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12113 
12114   // Check that it is legal on the target to do this.  It is legal if the new
12115   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12116   // legalization.
12117   MVT VT = MVT::getIntegerVT(NumBytes*8);
12118   if (!DC->isTypeLegal(VT))
12119     return nullptr;
12120 
12121   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12122   // shifted by ByteShift and truncated down to NumBytes.
12123   if (ByteShift) {
12124     SDLoc DL(IVal);
12125     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12126                        DAG.getConstant(ByteShift*8, DL,
12127                                     DC->getShiftAmountTy(IVal.getValueType())));
12128   }
12129 
12130   // Figure out the offset for the store and the alignment of the access.
12131   unsigned StOffset;
12132   unsigned NewAlign = St->getAlignment();
12133 
12134   if (DAG.getDataLayout().isLittleEndian())
12135     StOffset = ByteShift;
12136   else
12137     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12138 
12139   SDValue Ptr = St->getBasePtr();
12140   if (StOffset) {
12141     SDLoc DL(IVal);
12142     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12143                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12144     NewAlign = MinAlign(NewAlign, StOffset);
12145   }
12146 
12147   // Truncate down to the new size.
12148   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12149 
12150   ++OpsNarrowed;
12151   return DAG
12152       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12153                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12154       .getNode();
12155 }
12156 
12157 
12158 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12159 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12160 /// narrowing the load and store if it would end up being a win for performance
12161 /// or code size.
12162 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12163   StoreSDNode *ST  = cast<StoreSDNode>(N);
12164   if (ST->isVolatile())
12165     return SDValue();
12166 
12167   SDValue Chain = ST->getChain();
12168   SDValue Value = ST->getValue();
12169   SDValue Ptr   = ST->getBasePtr();
12170   EVT VT = Value.getValueType();
12171 
12172   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12173     return SDValue();
12174 
12175   unsigned Opc = Value.getOpcode();
12176 
12177   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12178   // is a byte mask indicating a consecutive number of bytes, check to see if
12179   // Y is known to provide just those bytes.  If so, we try to replace the
12180   // load + replace + store sequence with a single (narrower) store, which makes
12181   // the load dead.
12182   if (Opc == ISD::OR) {
12183     std::pair<unsigned, unsigned> MaskedLoad;
12184     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12185     if (MaskedLoad.first)
12186       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12187                                                   Value.getOperand(1), ST,this))
12188         return SDValue(NewST, 0);
12189 
12190     // Or is commutative, so try swapping X and Y.
12191     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12192     if (MaskedLoad.first)
12193       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12194                                                   Value.getOperand(0), ST,this))
12195         return SDValue(NewST, 0);
12196   }
12197 
12198   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12199       Value.getOperand(1).getOpcode() != ISD::Constant)
12200     return SDValue();
12201 
12202   SDValue N0 = Value.getOperand(0);
12203   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12204       Chain == SDValue(N0.getNode(), 1)) {
12205     LoadSDNode *LD = cast<LoadSDNode>(N0);
12206     if (LD->getBasePtr() != Ptr ||
12207         LD->getPointerInfo().getAddrSpace() !=
12208         ST->getPointerInfo().getAddrSpace())
12209       return SDValue();
12210 
12211     // Find the type to narrow it the load / op / store to.
12212     SDValue N1 = Value.getOperand(1);
12213     unsigned BitWidth = N1.getValueSizeInBits();
12214     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12215     if (Opc == ISD::AND)
12216       Imm ^= APInt::getAllOnesValue(BitWidth);
12217     if (Imm == 0 || Imm.isAllOnesValue())
12218       return SDValue();
12219     unsigned ShAmt = Imm.countTrailingZeros();
12220     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12221     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12222     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12223     // The narrowing should be profitable, the load/store operation should be
12224     // legal (or custom) and the store size should be equal to the NewVT width.
12225     while (NewBW < BitWidth &&
12226            (NewVT.getStoreSizeInBits() != NewBW ||
12227             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12228             !TLI.isNarrowingProfitable(VT, NewVT))) {
12229       NewBW = NextPowerOf2(NewBW);
12230       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12231     }
12232     if (NewBW >= BitWidth)
12233       return SDValue();
12234 
12235     // If the lsb changed does not start at the type bitwidth boundary,
12236     // start at the previous one.
12237     if (ShAmt % NewBW)
12238       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12239     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12240                                    std::min(BitWidth, ShAmt + NewBW));
12241     if ((Imm & Mask) == Imm) {
12242       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12243       if (Opc == ISD::AND)
12244         NewImm ^= APInt::getAllOnesValue(NewBW);
12245       uint64_t PtrOff = ShAmt / 8;
12246       // For big endian targets, we need to adjust the offset to the pointer to
12247       // load the correct bytes.
12248       if (DAG.getDataLayout().isBigEndian())
12249         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12250 
12251       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12252       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12253       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12254         return SDValue();
12255 
12256       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12257                                    Ptr.getValueType(), Ptr,
12258                                    DAG.getConstant(PtrOff, SDLoc(LD),
12259                                                    Ptr.getValueType()));
12260       SDValue NewLD =
12261           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12262                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12263                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12264       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12265                                    DAG.getConstant(NewImm, SDLoc(Value),
12266                                                    NewVT));
12267       SDValue NewST =
12268           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12269                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12270 
12271       AddToWorklist(NewPtr.getNode());
12272       AddToWorklist(NewLD.getNode());
12273       AddToWorklist(NewVal.getNode());
12274       WorklistRemover DeadNodes(*this);
12275       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12276       ++OpsNarrowed;
12277       return NewST;
12278     }
12279   }
12280 
12281   return SDValue();
12282 }
12283 
12284 /// For a given floating point load / store pair, if the load value isn't used
12285 /// by any other operations, then consider transforming the pair to integer
12286 /// load / store operations if the target deems the transformation profitable.
12287 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12288   StoreSDNode *ST  = cast<StoreSDNode>(N);
12289   SDValue Chain = ST->getChain();
12290   SDValue Value = ST->getValue();
12291   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12292       Value.hasOneUse() &&
12293       Chain == SDValue(Value.getNode(), 1)) {
12294     LoadSDNode *LD = cast<LoadSDNode>(Value);
12295     EVT VT = LD->getMemoryVT();
12296     if (!VT.isFloatingPoint() ||
12297         VT != ST->getMemoryVT() ||
12298         LD->isNonTemporal() ||
12299         ST->isNonTemporal() ||
12300         LD->getPointerInfo().getAddrSpace() != 0 ||
12301         ST->getPointerInfo().getAddrSpace() != 0)
12302       return SDValue();
12303 
12304     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12305     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12306         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12307         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12308         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12309       return SDValue();
12310 
12311     unsigned LDAlign = LD->getAlignment();
12312     unsigned STAlign = ST->getAlignment();
12313     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12314     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12315     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12316       return SDValue();
12317 
12318     SDValue NewLD =
12319         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12320                     LD->getPointerInfo(), LDAlign);
12321 
12322     SDValue NewST =
12323         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12324                      ST->getPointerInfo(), STAlign);
12325 
12326     AddToWorklist(NewLD.getNode());
12327     AddToWorklist(NewST.getNode());
12328     WorklistRemover DeadNodes(*this);
12329     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12330     ++LdStFP2Int;
12331     return NewST;
12332   }
12333 
12334   return SDValue();
12335 }
12336 
12337 // This is a helper function for visitMUL to check the profitability
12338 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12339 // MulNode is the original multiply, AddNode is (add x, c1),
12340 // and ConstNode is c2.
12341 //
12342 // If the (add x, c1) has multiple uses, we could increase
12343 // the number of adds if we make this transformation.
12344 // It would only be worth doing this if we can remove a
12345 // multiply in the process. Check for that here.
12346 // To illustrate:
12347 //     (A + c1) * c3
12348 //     (A + c2) * c3
12349 // We're checking for cases where we have common "c3 * A" expressions.
12350 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12351                                               SDValue &AddNode,
12352                                               SDValue &ConstNode) {
12353   APInt Val;
12354 
12355   // If the add only has one use, this would be OK to do.
12356   if (AddNode.getNode()->hasOneUse())
12357     return true;
12358 
12359   // Walk all the users of the constant with which we're multiplying.
12360   for (SDNode *Use : ConstNode->uses()) {
12361 
12362     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12363       continue;
12364 
12365     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12366       SDNode *OtherOp;
12367       SDNode *MulVar = AddNode.getOperand(0).getNode();
12368 
12369       // OtherOp is what we're multiplying against the constant.
12370       if (Use->getOperand(0) == ConstNode)
12371         OtherOp = Use->getOperand(1).getNode();
12372       else
12373         OtherOp = Use->getOperand(0).getNode();
12374 
12375       // Check to see if multiply is with the same operand of our "add".
12376       //
12377       //     ConstNode  = CONST
12378       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12379       //     ...
12380       //     AddNode  = (A + c1)  <-- MulVar is A.
12381       //         = AddNode * ConstNode   <-- current visiting instruction.
12382       //
12383       // If we make this transformation, we will have a common
12384       // multiply (ConstNode * A) that we can save.
12385       if (OtherOp == MulVar)
12386         return true;
12387 
12388       // Now check to see if a future expansion will give us a common
12389       // multiply.
12390       //
12391       //     ConstNode  = CONST
12392       //     AddNode    = (A + c1)
12393       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12394       //     ...
12395       //     OtherOp = (A + c2)
12396       //     Use     = OtherOp * ConstNode <-- visiting Use.
12397       //
12398       // If we make this transformation, we will have a common
12399       // multiply (CONST * A) after we also do the same transformation
12400       // to the "t2" instruction.
12401       if (OtherOp->getOpcode() == ISD::ADD &&
12402           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12403           OtherOp->getOperand(0).getNode() == MulVar)
12404         return true;
12405     }
12406   }
12407 
12408   // Didn't find a case where this would be profitable.
12409   return false;
12410 }
12411 
12412 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12413                                          unsigned NumStores) {
12414   SmallVector<SDValue, 8> Chains;
12415   SmallPtrSet<const SDNode *, 8> Visited;
12416   SDLoc StoreDL(StoreNodes[0].MemNode);
12417 
12418   for (unsigned i = 0; i < NumStores; ++i) {
12419     Visited.insert(StoreNodes[i].MemNode);
12420   }
12421 
12422   // don't include nodes that are children
12423   for (unsigned i = 0; i < NumStores; ++i) {
12424     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12425       Chains.push_back(StoreNodes[i].MemNode->getChain());
12426   }
12427 
12428   assert(Chains.size() > 0 && "Chain should have generated a chain");
12429   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12430 }
12431 
12432 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12433     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12434     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12435   // Make sure we have something to merge.
12436   if (NumStores < 2)
12437     return false;
12438 
12439   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12440 
12441   // The latest Node in the DAG.
12442   SDLoc DL(StoreNodes[0].MemNode);
12443 
12444   SDValue StoredVal;
12445   if (UseVector) {
12446     bool IsVec = MemVT.isVector();
12447     unsigned Elts = NumStores;
12448     if (IsVec) {
12449       // When merging vector stores, get the total number of elements.
12450       Elts *= MemVT.getVectorNumElements();
12451     }
12452     // Get the type for the merged vector store.
12453     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12454     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12455 
12456     if (IsConstantSrc) {
12457       SmallVector<SDValue, 8> BuildVector;
12458       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12459         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12460         SDValue Val = St->getValue();
12461         if (MemVT.getScalarType().isInteger())
12462           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12463             Val = DAG.getConstant(
12464                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12465                 SDLoc(CFP), MemVT);
12466         BuildVector.push_back(Val);
12467       }
12468       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12469     } else {
12470       SmallVector<SDValue, 8> Ops;
12471       for (unsigned i = 0; i < NumStores; ++i) {
12472         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12473         SDValue Val = St->getValue();
12474         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12475         if (Val.getValueType() != MemVT)
12476           return false;
12477         Ops.push_back(Val);
12478       }
12479 
12480       // Build the extracted vector elements back into a vector.
12481       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12482                               DL, Ty, Ops);    }
12483   } else {
12484     // We should always use a vector store when merging extracted vector
12485     // elements, so this path implies a store of constants.
12486     assert(IsConstantSrc && "Merged vector elements should use vector store");
12487 
12488     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12489     APInt StoreInt(SizeInBits, 0);
12490 
12491     // Construct a single integer constant which is made of the smaller
12492     // constant inputs.
12493     bool IsLE = DAG.getDataLayout().isLittleEndian();
12494     for (unsigned i = 0; i < NumStores; ++i) {
12495       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12496       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12497 
12498       SDValue Val = St->getValue();
12499       StoreInt <<= ElementSizeBytes * 8;
12500       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12501         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12502       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12503         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12504       } else {
12505         llvm_unreachable("Invalid constant element type");
12506       }
12507     }
12508 
12509     // Create the new Load and Store operations.
12510     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12511     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12512   }
12513 
12514   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12515   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12516 
12517   // make sure we use trunc store if it's necessary to be legal.
12518   SDValue NewStore;
12519   if (UseVector || !UseTrunc) {
12520     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12521                             FirstInChain->getPointerInfo(),
12522                             FirstInChain->getAlignment());
12523   } else { // Must be realized as a trunc store
12524     EVT LegalizedStoredValueTy =
12525         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12526     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12527     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12528     SDValue ExtendedStoreVal =
12529         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12530                         LegalizedStoredValueTy);
12531     NewStore = DAG.getTruncStore(
12532         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12533         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12534         FirstInChain->getAlignment(),
12535         FirstInChain->getMemOperand()->getFlags());
12536   }
12537 
12538   // Replace all merged stores with the new store.
12539   for (unsigned i = 0; i < NumStores; ++i)
12540     CombineTo(StoreNodes[i].MemNode, NewStore);
12541 
12542   AddToWorklist(NewChain.getNode());
12543   return true;
12544 }
12545 
12546 void DAGCombiner::getStoreMergeCandidates(
12547     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12548   // This holds the base pointer, index, and the offset in bytes from the base
12549   // pointer.
12550   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12551   EVT MemVT = St->getMemoryVT();
12552 
12553   // We must have a base and an offset.
12554   if (!BasePtr.getBase().getNode())
12555     return;
12556 
12557   // Do not handle stores to undef base pointers.
12558   if (BasePtr.getBase().isUndef())
12559     return;
12560 
12561   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12562                        isa<ConstantFPSDNode>(St->getValue());
12563   bool IsExtractVecSrc =
12564       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12565        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12566   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12567   BaseIndexOffset LBasePtr;
12568   // Match on loadbaseptr if relevant.
12569   if (IsLoadSrc)
12570     LBasePtr = BaseIndexOffset::match(
12571         cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
12572 
12573   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12574                             int64_t &Offset) -> bool {
12575     if (Other->isVolatile() || Other->isIndexed())
12576       return false;
12577     // We can merge constant floats to equivalent integers
12578     if (Other->getMemoryVT() != MemVT)
12579       if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
12580             isa<ConstantFPSDNode>(Other->getValue())))
12581         return false;
12582     if (IsLoadSrc) {
12583       // The Load's Base Ptr must also match
12584       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
12585         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12586         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12587           return false;
12588       } else
12589         return false;
12590     }
12591     if (IsConstantSrc)
12592       if (!(isa<ConstantSDNode>(Other->getValue()) ||
12593             isa<ConstantFPSDNode>(Other->getValue())))
12594         return false;
12595     if (IsExtractVecSrc)
12596       if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12597             Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
12598         return false;
12599     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12600     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12601   };
12602   // We looking for a root node which is an ancestor to all mergable
12603   // stores. We search up through a load, to our root and then down
12604   // through all children. For instance we will find Store{1,2,3} if
12605   // St is Store1, Store2. or Store3 where the root is not a load
12606   // which always true for nonvolatile ops. TODO: Expand
12607   // the search to find all valid candidates through multiple layers of loads.
12608   //
12609   // Root
12610   // |-------|-------|
12611   // Load    Load    Store3
12612   // |       |
12613   // Store1   Store2
12614   //
12615   // FIXME: We should be able to climb and
12616   // descend TokenFactors to find candidates as well.
12617 
12618   SDNode *RootNode = (St->getChain()).getNode();
12619 
12620   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12621     RootNode = Ldn->getChain().getNode();
12622     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12623       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12624         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12625           if (I2.getOperandNo() == 0)
12626             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12627               BaseIndexOffset Ptr;
12628               int64_t PtrDiff;
12629               if (CandidateMatch(OtherST, Ptr, PtrDiff))
12630                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12631             }
12632   } else
12633     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12634       if (I.getOperandNo() == 0)
12635         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12636           BaseIndexOffset Ptr;
12637           int64_t PtrDiff;
12638           if (CandidateMatch(OtherST, Ptr, PtrDiff))
12639             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12640         }
12641 }
12642 
12643 // We need to check that merging these stores does not cause a loop
12644 // in the DAG. Any store candidate may depend on another candidate
12645 // indirectly through its operand (we already consider dependencies
12646 // through the chain). Check in parallel by searching up from
12647 // non-chain operands of candidates.
12648 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12649     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12650   SmallPtrSet<const SDNode *, 16> Visited;
12651   SmallVector<const SDNode *, 8> Worklist;
12652   // search ops of store candidates
12653   for (unsigned i = 0; i < NumStores; ++i) {
12654     SDNode *n = StoreNodes[i].MemNode;
12655     // Potential loops may happen only through non-chain operands
12656     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12657       Worklist.push_back(n->getOperand(j).getNode());
12658   }
12659   // search through DAG. We can stop early if we find a storenode
12660   for (unsigned i = 0; i < NumStores; ++i) {
12661     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12662       return false;
12663   }
12664   return true;
12665 }
12666 
12667 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12668   if (OptLevel == CodeGenOpt::None)
12669     return false;
12670 
12671   EVT MemVT = St->getMemoryVT();
12672   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12673 
12674   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12675     return false;
12676 
12677   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12678       Attribute::NoImplicitFloat);
12679 
12680   // This function cannot currently deal with non-byte-sized memory sizes.
12681   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12682     return false;
12683 
12684   if (!MemVT.isSimple())
12685     return false;
12686 
12687   // Perform an early exit check. Do not bother looking at stored values that
12688   // are not constants, loads, or extracted vector elements.
12689   SDValue StoredVal = St->getValue();
12690   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12691   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12692                        isa<ConstantFPSDNode>(StoredVal);
12693   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12694                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12695 
12696   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12697     return false;
12698 
12699   // Don't merge vectors into wider vectors if the source data comes from loads.
12700   // TODO: This restriction can be lifted by using logic similar to the
12701   // ExtractVecSrc case.
12702   if (MemVT.isVector() && IsLoadSrc)
12703     return false;
12704 
12705   SmallVector<MemOpLink, 8> StoreNodes;
12706   // Find potential store merge candidates by searching through chain sub-DAG
12707   getStoreMergeCandidates(St, StoreNodes);
12708 
12709   // Check if there is anything to merge.
12710   if (StoreNodes.size() < 2)
12711     return false;
12712 
12713   // Sort the memory operands according to their distance from the
12714   // base pointer.
12715   std::sort(StoreNodes.begin(), StoreNodes.end(),
12716             [](MemOpLink LHS, MemOpLink RHS) {
12717               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12718             });
12719 
12720   // Store Merge attempts to merge the lowest stores. This generally
12721   // works out as if successful, as the remaining stores are checked
12722   // after the first collection of stores is merged. However, in the
12723   // case that a non-mergeable store is found first, e.g., {p[-2],
12724   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12725   // mergeable cases. To prevent this, we prune such stores from the
12726   // front of StoreNodes here.
12727 
12728   bool RV = false;
12729   while (StoreNodes.size() > 1) {
12730     unsigned StartIdx = 0;
12731     while ((StartIdx + 1 < StoreNodes.size()) &&
12732            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12733                StoreNodes[StartIdx + 1].OffsetFromBase)
12734       ++StartIdx;
12735 
12736     // Bail if we don't have enough candidates to merge.
12737     if (StartIdx + 1 >= StoreNodes.size())
12738       return RV;
12739 
12740     if (StartIdx)
12741       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12742 
12743     // Scan the memory operations on the chain and find the first
12744     // non-consecutive store memory address.
12745     unsigned NumConsecutiveStores = 1;
12746     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12747     // Check that the addresses are consecutive starting from the second
12748     // element in the list of stores.
12749     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12750       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12751       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12752         break;
12753       NumConsecutiveStores = i + 1;
12754     }
12755 
12756     if (NumConsecutiveStores < 2) {
12757       StoreNodes.erase(StoreNodes.begin(),
12758                        StoreNodes.begin() + NumConsecutiveStores);
12759       continue;
12760     }
12761 
12762     // Check that we can merge these candidates without causing a cycle
12763     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12764                                                   NumConsecutiveStores)) {
12765       StoreNodes.erase(StoreNodes.begin(),
12766                        StoreNodes.begin() + NumConsecutiveStores);
12767       continue;
12768     }
12769 
12770     // The node with the lowest store address.
12771     LLVMContext &Context = *DAG.getContext();
12772     const DataLayout &DL = DAG.getDataLayout();
12773 
12774     // Store the constants into memory as one consecutive store.
12775     if (IsConstantSrc) {
12776       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12777       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12778       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12779       unsigned LastLegalType = 1;
12780       unsigned LastLegalVectorType = 1;
12781       bool LastIntegerTrunc = false;
12782       bool NonZero = false;
12783       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12784         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12785         SDValue StoredVal = ST->getValue();
12786 
12787         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12788           NonZero |= !C->isNullValue();
12789         } else if (ConstantFPSDNode *C =
12790                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12791           NonZero |= !C->getConstantFPValue()->isNullValue();
12792         } else {
12793           // Non-constant.
12794           break;
12795         }
12796 
12797         // Find a legal type for the constant store.
12798         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12799         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12800         bool IsFast = false;
12801         if (TLI.isTypeLegal(StoreTy) &&
12802             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12803             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12804                                    FirstStoreAlign, &IsFast) &&
12805             IsFast) {
12806           LastIntegerTrunc = false;
12807           LastLegalType = i + 1;
12808           // Or check whether a truncstore is legal.
12809         } else if (TLI.getTypeAction(Context, StoreTy) ==
12810                    TargetLowering::TypePromoteInteger) {
12811           EVT LegalizedStoredValueTy =
12812               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12813           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12814               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12815               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12816                                      FirstStoreAlign, &IsFast) &&
12817               IsFast) {
12818             LastIntegerTrunc = true;
12819             LastLegalType = i + 1;
12820           }
12821         }
12822 
12823         // We only use vectors if the constant is known to be zero or the target
12824         // allows it and the function is not marked with the noimplicitfloat
12825         // attribute.
12826         if ((!NonZero ||
12827              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12828             !NoVectors) {
12829           // Find a legal type for the vector store.
12830           unsigned Elts = i + 1;
12831           if (MemVT.isVector()) {
12832             // When merging vector stores, get the total number of elements.
12833             Elts *= MemVT.getVectorNumElements();
12834           }
12835           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
12836           if (TLI.isTypeLegal(Ty) &&
12837               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12838               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12839                                      FirstStoreAlign, &IsFast) &&
12840               IsFast)
12841             LastLegalVectorType = i + 1;
12842         }
12843       }
12844 
12845       // Check if we found a legal integer type that creates a meaningful merge.
12846       if (LastLegalType < 2 && LastLegalVectorType < 2) {
12847         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12848         continue;
12849       }
12850 
12851       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12852       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12853 
12854       bool Merged = MergeStoresOfConstantsOrVecElts(
12855           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
12856       if (!Merged) {
12857         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12858         continue;
12859       }
12860       // Remove merged stores for next iteration.
12861       RV = true;
12862       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12863       continue;
12864     }
12865 
12866     // When extracting multiple vector elements, try to store them
12867     // in one vector store rather than a sequence of scalar stores.
12868     if (IsExtractVecSrc) {
12869       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12870       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12871       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12872       unsigned NumStoresToMerge = 1;
12873       bool IsVec = MemVT.isVector();
12874       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12875         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12876         unsigned StoreValOpcode = St->getValue().getOpcode();
12877         // This restriction could be loosened.
12878         // Bail out if any stored values are not elements extracted from a
12879         // vector. It should be possible to handle mixed sources, but load
12880         // sources need more careful handling (see the block of code below that
12881         // handles consecutive loads).
12882         if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12883             StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12884           return RV;
12885 
12886         // Find a legal type for the vector store.
12887         unsigned Elts = i + 1;
12888         if (IsVec) {
12889           // When merging vector stores, get the total number of elements.
12890           Elts *= MemVT.getVectorNumElements();
12891         }
12892         EVT Ty =
12893             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12894         bool IsFast;
12895         if (TLI.isTypeLegal(Ty) &&
12896             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12897             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12898                                    FirstStoreAlign, &IsFast) &&
12899             IsFast)
12900           NumStoresToMerge = i + 1;
12901       }
12902 
12903       bool Merged = MergeStoresOfConstantsOrVecElts(
12904           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
12905       if (!Merged) {
12906         StoreNodes.erase(StoreNodes.begin(),
12907                          StoreNodes.begin() + NumStoresToMerge);
12908         continue;
12909       }
12910       // Remove merged stores for next iteration.
12911       StoreNodes.erase(StoreNodes.begin(),
12912                        StoreNodes.begin() + NumStoresToMerge);
12913       RV = true;
12914       continue;
12915     }
12916 
12917     // Below we handle the case of multiple consecutive stores that
12918     // come from multiple consecutive loads. We merge them into a single
12919     // wide load and a single wide store.
12920 
12921     // Look for load nodes which are used by the stored values.
12922     SmallVector<MemOpLink, 8> LoadNodes;
12923 
12924     // Find acceptable loads. Loads need to have the same chain (token factor),
12925     // must not be zext, volatile, indexed, and they must be consecutive.
12926     BaseIndexOffset LdBasePtr;
12927     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12928       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12929       LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12930       if (!Ld)
12931         break;
12932 
12933       // Loads must only have one use.
12934       if (!Ld->hasNUsesOfValue(1, 0))
12935         break;
12936 
12937       // The memory operands must not be volatile.
12938       if (Ld->isVolatile() || Ld->isIndexed())
12939         break;
12940 
12941       // We do not accept ext loads.
12942       if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12943         break;
12944 
12945       // The stored memory type must be the same.
12946       if (Ld->getMemoryVT() != MemVT)
12947         break;
12948 
12949       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12950       // If this is not the first ptr that we check.
12951       int64_t LdOffset = 0;
12952       if (LdBasePtr.getBase().getNode()) {
12953         // The base ptr must be the same.
12954         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
12955           break;
12956       } else {
12957         // Check that all other base pointers are the same as this one.
12958         LdBasePtr = LdPtr;
12959       }
12960 
12961       // We found a potential memory operand to merge.
12962       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
12963     }
12964 
12965     if (LoadNodes.size() < 2) {
12966       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12967       continue;
12968     }
12969 
12970     // If we have load/store pair instructions and we only have two values,
12971     // don't bother merging.
12972     unsigned RequiredAlignment;
12973     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12974         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
12975       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
12976       continue;
12977     }
12978     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12979     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12980     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12981     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12982     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12983     unsigned FirstLoadAlign = FirstLoad->getAlignment();
12984 
12985     // Scan the memory operations on the chain and find the first
12986     // non-consecutive load memory address. These variables hold the index in
12987     // the store node array.
12988     unsigned LastConsecutiveLoad = 1;
12989     // This variable refers to the size and not index in the array.
12990     unsigned LastLegalVectorType = 1;
12991     unsigned LastLegalIntegerType = 1;
12992     bool isDereferenceable = true;
12993     bool DoIntegerTruncate = false;
12994     StartAddress = LoadNodes[0].OffsetFromBase;
12995     SDValue FirstChain = FirstLoad->getChain();
12996     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12997       // All loads must share the same chain.
12998       if (LoadNodes[i].MemNode->getChain() != FirstChain)
12999         break;
13000 
13001       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13002       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13003         break;
13004       LastConsecutiveLoad = i;
13005 
13006       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13007         isDereferenceable = false;
13008 
13009       // Find a legal type for the vector store.
13010       EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
13011       bool IsFastSt, IsFastLd;
13012       if (TLI.isTypeLegal(StoreTy) &&
13013           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13014           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13015                                  FirstStoreAlign, &IsFastSt) &&
13016           IsFastSt &&
13017           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13018                                  FirstLoadAlign, &IsFastLd) &&
13019           IsFastLd) {
13020         LastLegalVectorType = i + 1;
13021       }
13022 
13023       // Find a legal type for the integer store.
13024       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13025       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13026       if (TLI.isTypeLegal(StoreTy) &&
13027           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13028           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13029                                  FirstStoreAlign, &IsFastSt) &&
13030           IsFastSt &&
13031           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13032                                  FirstLoadAlign, &IsFastLd) &&
13033           IsFastLd) {
13034         LastLegalIntegerType = i + 1;
13035         DoIntegerTruncate = false;
13036         // Or check whether a truncstore and extload is legal.
13037       } else if (TLI.getTypeAction(Context, StoreTy) ==
13038                  TargetLowering::TypePromoteInteger) {
13039         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13040         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13041             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13042             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13043                                StoreTy) &&
13044             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13045                                StoreTy) &&
13046             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13047             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13048                                    FirstStoreAlign, &IsFastSt) &&
13049             IsFastSt &&
13050             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13051                                    FirstLoadAlign, &IsFastLd) &&
13052             IsFastLd) {
13053           LastLegalIntegerType = i + 1;
13054           DoIntegerTruncate = true;
13055         }
13056       }
13057     }
13058 
13059     // Only use vector types if the vector type is larger than the integer type.
13060     // If they are the same, use integers.
13061     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13062     unsigned LastLegalType =
13063         std::max(LastLegalVectorType, LastLegalIntegerType);
13064 
13065     // We add +1 here because the LastXXX variables refer to location while
13066     // the NumElem refers to array/index size.
13067     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13068     NumElem = std::min(LastLegalType, NumElem);
13069 
13070     if (NumElem < 2) {
13071       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13072       continue;
13073     }
13074 
13075     // Find if it is better to use vectors or integers to load and store
13076     // to memory.
13077     EVT JointMemOpVT;
13078     if (UseVectorTy) {
13079       JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
13080     } else {
13081       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13082       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13083     }
13084 
13085     SDLoc LoadDL(LoadNodes[0].MemNode);
13086     SDLoc StoreDL(StoreNodes[0].MemNode);
13087 
13088     // The merged loads are required to have the same incoming chain, so
13089     // using the first's chain is acceptable.
13090 
13091     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13092     AddToWorklist(NewStoreChain.getNode());
13093 
13094     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13095                                           MachineMemOperand::MODereferenceable:
13096                                           MachineMemOperand::MONone;
13097 
13098     SDValue NewLoad, NewStore;
13099     if (UseVectorTy || !DoIntegerTruncate) {
13100       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13101                             FirstLoad->getBasePtr(),
13102                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13103                             MMOFlags);
13104       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13105                               FirstInChain->getBasePtr(),
13106                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13107     } else { // This must be the truncstore/extload case
13108       EVT ExtendedTy =
13109           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13110       NewLoad =
13111           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13112                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13113                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13114       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13115                                    FirstInChain->getBasePtr(),
13116                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13117                                    FirstInChain->getAlignment(),
13118                                    FirstInChain->getMemOperand()->getFlags());
13119     }
13120 
13121     // Transfer chain users from old loads to the new load.
13122     for (unsigned i = 0; i < NumElem; ++i) {
13123       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13124       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13125                                     SDValue(NewLoad.getNode(), 1));
13126     }
13127 
13128     // Replace the all stores with the new store.
13129     for (unsigned i = 0; i < NumElem; ++i)
13130       CombineTo(StoreNodes[i].MemNode, NewStore);
13131     RV = true;
13132     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13133     continue;
13134   }
13135   return RV;
13136 }
13137 
13138 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13139   SDLoc SL(ST);
13140   SDValue ReplStore;
13141 
13142   // Replace the chain to avoid dependency.
13143   if (ST->isTruncatingStore()) {
13144     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13145                                   ST->getBasePtr(), ST->getMemoryVT(),
13146                                   ST->getMemOperand());
13147   } else {
13148     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13149                              ST->getMemOperand());
13150   }
13151 
13152   // Create token to keep both nodes around.
13153   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13154                               MVT::Other, ST->getChain(), ReplStore);
13155 
13156   // Make sure the new and old chains are cleaned up.
13157   AddToWorklist(Token.getNode());
13158 
13159   // Don't add users to work list.
13160   return CombineTo(ST, Token, false);
13161 }
13162 
13163 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13164   SDValue Value = ST->getValue();
13165   if (Value.getOpcode() == ISD::TargetConstantFP)
13166     return SDValue();
13167 
13168   SDLoc DL(ST);
13169 
13170   SDValue Chain = ST->getChain();
13171   SDValue Ptr = ST->getBasePtr();
13172 
13173   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13174 
13175   // NOTE: If the original store is volatile, this transform must not increase
13176   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13177   // processor operation but an i64 (which is not legal) requires two.  So the
13178   // transform should not be done in this case.
13179 
13180   SDValue Tmp;
13181   switch (CFP->getSimpleValueType(0).SimpleTy) {
13182   default:
13183     llvm_unreachable("Unknown FP type");
13184   case MVT::f16:    // We don't do this for these yet.
13185   case MVT::f80:
13186   case MVT::f128:
13187   case MVT::ppcf128:
13188     return SDValue();
13189   case MVT::f32:
13190     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13191         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13192       ;
13193       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13194                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13195                             MVT::i32);
13196       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13197     }
13198 
13199     return SDValue();
13200   case MVT::f64:
13201     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13202          !ST->isVolatile()) ||
13203         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13204       ;
13205       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13206                             getZExtValue(), SDLoc(CFP), MVT::i64);
13207       return DAG.getStore(Chain, DL, Tmp,
13208                           Ptr, ST->getMemOperand());
13209     }
13210 
13211     if (!ST->isVolatile() &&
13212         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13213       // Many FP stores are not made apparent until after legalize, e.g. for
13214       // argument passing.  Since this is so common, custom legalize the
13215       // 64-bit integer store into two 32-bit stores.
13216       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13217       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13218       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13219       if (DAG.getDataLayout().isBigEndian())
13220         std::swap(Lo, Hi);
13221 
13222       unsigned Alignment = ST->getAlignment();
13223       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13224       AAMDNodes AAInfo = ST->getAAInfo();
13225 
13226       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13227                                  ST->getAlignment(), MMOFlags, AAInfo);
13228       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13229                         DAG.getConstant(4, DL, Ptr.getValueType()));
13230       Alignment = MinAlign(Alignment, 4U);
13231       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13232                                  ST->getPointerInfo().getWithOffset(4),
13233                                  Alignment, MMOFlags, AAInfo);
13234       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13235                          St0, St1);
13236     }
13237 
13238     return SDValue();
13239   }
13240 }
13241 
13242 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13243   StoreSDNode *ST  = cast<StoreSDNode>(N);
13244   SDValue Chain = ST->getChain();
13245   SDValue Value = ST->getValue();
13246   SDValue Ptr   = ST->getBasePtr();
13247 
13248   // If this is a store of a bit convert, store the input value if the
13249   // resultant store does not need a higher alignment than the original.
13250   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13251       ST->isUnindexed()) {
13252     EVT SVT = Value.getOperand(0).getValueType();
13253     if (((!LegalOperations && !ST->isVolatile()) ||
13254          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13255         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13256       unsigned OrigAlign = ST->getAlignment();
13257       bool Fast = false;
13258       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13259                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13260           Fast) {
13261         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13262                             ST->getPointerInfo(), OrigAlign,
13263                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13264       }
13265     }
13266   }
13267 
13268   // Turn 'store undef, Ptr' -> nothing.
13269   if (Value.isUndef() && ST->isUnindexed())
13270     return Chain;
13271 
13272   // Try to infer better alignment information than the store already has.
13273   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13274     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13275       if (Align > ST->getAlignment()) {
13276         SDValue NewStore =
13277             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13278                               ST->getMemoryVT(), Align,
13279                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13280         if (NewStore.getNode() != N)
13281           return CombineTo(ST, NewStore, true);
13282       }
13283     }
13284   }
13285 
13286   // Try transforming a pair floating point load / store ops to integer
13287   // load / store ops.
13288   if (SDValue NewST = TransformFPLoadStorePair(N))
13289     return NewST;
13290 
13291   if (ST->isUnindexed()) {
13292     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13293     // adjacent stores.
13294     if (findBetterNeighborChains(ST)) {
13295       // replaceStoreChain uses CombineTo, which handled all of the worklist
13296       // manipulation. Return the original node to not do anything else.
13297       return SDValue(ST, 0);
13298     }
13299     Chain = ST->getChain();
13300   }
13301 
13302   // FIXME: is there such a thing as a truncating indexed store?
13303   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13304       Value.getValueType().isInteger()) {
13305     // See if we can simplify the input to this truncstore with knowledge that
13306     // only the low bits are being used.  For example:
13307     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13308     SDValue Shorter = DAG.GetDemandedBits(
13309         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13310                                     ST->getMemoryVT().getScalarSizeInBits()));
13311     AddToWorklist(Value.getNode());
13312     if (Shorter.getNode())
13313       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13314                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13315 
13316     // Otherwise, see if we can simplify the operation with
13317     // SimplifyDemandedBits, which only works if the value has a single use.
13318     if (SimplifyDemandedBits(
13319             Value,
13320             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13321                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13322       // Re-visit the store if anything changed and the store hasn't been merged
13323       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13324       // node back to the worklist if necessary, but we also need to re-visit
13325       // the Store node itself.
13326       if (N->getOpcode() != ISD::DELETED_NODE)
13327         AddToWorklist(N);
13328       return SDValue(N, 0);
13329     }
13330   }
13331 
13332   // If this is a load followed by a store to the same location, then the store
13333   // is dead/noop.
13334   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13335     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13336         ST->isUnindexed() && !ST->isVolatile() &&
13337         // There can't be any side effects between the load and store, such as
13338         // a call or store.
13339         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13340       // The store is dead, remove it.
13341       return Chain;
13342     }
13343   }
13344 
13345   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13346     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13347         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13348         ST->getMemoryVT() == ST1->getMemoryVT()) {
13349       // If this is a store followed by a store with the same value to the same
13350       // location, then the store is dead/noop.
13351       if (ST1->getValue() == Value) {
13352         // The store is dead, remove it.
13353         return Chain;
13354       }
13355 
13356       // If this is a store who's preceeding store to the same location
13357       // and no one other node is chained to that store we can effectively
13358       // drop the store. Do not remove stores to undef as they may be used as
13359       // data sinks.
13360       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13361           !ST1->getBasePtr().isUndef()) {
13362         // ST1 is fully overwritten and can be elided. Combine with it's chain
13363         // value.
13364         CombineTo(ST1, ST1->getChain());
13365         return SDValue();
13366       }
13367     }
13368   }
13369 
13370   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13371   // truncating store.  We can do this even if this is already a truncstore.
13372   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13373       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13374       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13375                             ST->getMemoryVT())) {
13376     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13377                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13378   }
13379 
13380   // Only perform this optimization before the types are legal, because we
13381   // don't want to perform this optimization on every DAGCombine invocation.
13382   if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
13383                                            : !LegalTypes) {
13384     for (;;) {
13385       // There can be multiple store sequences on the same chain.
13386       // Keep trying to merge store sequences until we are unable to do so
13387       // or until we merge the last store on the chain.
13388       bool Changed = MergeConsecutiveStores(ST);
13389       if (!Changed) break;
13390       // Return N as merge only uses CombineTo and no worklist clean
13391       // up is necessary.
13392       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13393         return SDValue(N, 0);
13394     }
13395   }
13396 
13397   // Try transforming N to an indexed store.
13398   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13399     return SDValue(N, 0);
13400 
13401   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13402   //
13403   // Make sure to do this only after attempting to merge stores in order to
13404   //  avoid changing the types of some subset of stores due to visit order,
13405   //  preventing their merging.
13406   if (isa<ConstantFPSDNode>(ST->getValue())) {
13407     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13408       return NewSt;
13409   }
13410 
13411   if (SDValue NewSt = splitMergedValStore(ST))
13412     return NewSt;
13413 
13414   return ReduceLoadOpStoreWidth(N);
13415 }
13416 
13417 /// For the instruction sequence of store below, F and I values
13418 /// are bundled together as an i64 value before being stored into memory.
13419 /// Sometimes it is more efficent to generate separate stores for F and I,
13420 /// which can remove the bitwise instructions or sink them to colder places.
13421 ///
13422 ///   (store (or (zext (bitcast F to i32) to i64),
13423 ///              (shl (zext I to i64), 32)), addr)  -->
13424 ///   (store F, addr) and (store I, addr+4)
13425 ///
13426 /// Similarly, splitting for other merged store can also be beneficial, like:
13427 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13428 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13429 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13430 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13431 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13432 ///
13433 /// We allow each target to determine specifically which kind of splitting is
13434 /// supported.
13435 ///
13436 /// The store patterns are commonly seen from the simple code snippet below
13437 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13438 ///   void goo(const std::pair<int, float> &);
13439 ///   hoo() {
13440 ///     ...
13441 ///     goo(std::make_pair(tmp, ftmp));
13442 ///     ...
13443 ///   }
13444 ///
13445 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13446   if (OptLevel == CodeGenOpt::None)
13447     return SDValue();
13448 
13449   SDValue Val = ST->getValue();
13450   SDLoc DL(ST);
13451 
13452   // Match OR operand.
13453   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13454     return SDValue();
13455 
13456   // Match SHL operand and get Lower and Higher parts of Val.
13457   SDValue Op1 = Val.getOperand(0);
13458   SDValue Op2 = Val.getOperand(1);
13459   SDValue Lo, Hi;
13460   if (Op1.getOpcode() != ISD::SHL) {
13461     std::swap(Op1, Op2);
13462     if (Op1.getOpcode() != ISD::SHL)
13463       return SDValue();
13464   }
13465   Lo = Op2;
13466   Hi = Op1.getOperand(0);
13467   if (!Op1.hasOneUse())
13468     return SDValue();
13469 
13470   // Match shift amount to HalfValBitSize.
13471   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13472   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13473   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13474     return SDValue();
13475 
13476   // Lo and Hi are zero-extended from int with size less equal than 32
13477   // to i64.
13478   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13479       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13480       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13481       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13482       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13483       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13484     return SDValue();
13485 
13486   // Use the EVT of low and high parts before bitcast as the input
13487   // of target query.
13488   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13489                   ? Lo.getOperand(0).getValueType()
13490                   : Lo.getValueType();
13491   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13492                    ? Hi.getOperand(0).getValueType()
13493                    : Hi.getValueType();
13494   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13495     return SDValue();
13496 
13497   // Start to split store.
13498   unsigned Alignment = ST->getAlignment();
13499   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13500   AAMDNodes AAInfo = ST->getAAInfo();
13501 
13502   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13503   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13504   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13505   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13506 
13507   SDValue Chain = ST->getChain();
13508   SDValue Ptr = ST->getBasePtr();
13509   // Lower value store.
13510   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13511                              ST->getAlignment(), MMOFlags, AAInfo);
13512   Ptr =
13513       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13514                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13515   // Higher value store.
13516   SDValue St1 =
13517       DAG.getStore(St0, DL, Hi, Ptr,
13518                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13519                    Alignment / 2, MMOFlags, AAInfo);
13520   return St1;
13521 }
13522 
13523 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13524   SDValue InVec = N->getOperand(0);
13525   SDValue InVal = N->getOperand(1);
13526   SDValue EltNo = N->getOperand(2);
13527   SDLoc DL(N);
13528 
13529   // If the inserted element is an UNDEF, just use the input vector.
13530   if (InVal.isUndef())
13531     return InVec;
13532 
13533   EVT VT = InVec.getValueType();
13534 
13535   // Remove redundant insertions:
13536   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
13537   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13538       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
13539     return InVec;
13540 
13541   // Check that we know which element is being inserted
13542   if (!isa<ConstantSDNode>(EltNo))
13543     return SDValue();
13544   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13545 
13546   // Canonicalize insert_vector_elt dag nodes.
13547   // Example:
13548   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13549   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13550   //
13551   // Do this only if the child insert_vector node has one use; also
13552   // do this only if indices are both constants and Idx1 < Idx0.
13553   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13554       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13555     unsigned OtherElt = InVec.getConstantOperandVal(2);
13556     if (Elt < OtherElt) {
13557       // Swap nodes.
13558       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13559                                   InVec.getOperand(0), InVal, EltNo);
13560       AddToWorklist(NewOp.getNode());
13561       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13562                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13563     }
13564   }
13565 
13566   // If we can't generate a legal BUILD_VECTOR, exit
13567   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13568     return SDValue();
13569 
13570   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13571   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13572   // vector elements.
13573   SmallVector<SDValue, 8> Ops;
13574   // Do not combine these two vectors if the output vector will not replace
13575   // the input vector.
13576   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13577     Ops.append(InVec.getNode()->op_begin(),
13578                InVec.getNode()->op_end());
13579   } else if (InVec.isUndef()) {
13580     unsigned NElts = VT.getVectorNumElements();
13581     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13582   } else {
13583     return SDValue();
13584   }
13585 
13586   // Insert the element
13587   if (Elt < Ops.size()) {
13588     // All the operands of BUILD_VECTOR must have the same type;
13589     // we enforce that here.
13590     EVT OpVT = Ops[0].getValueType();
13591     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13592   }
13593 
13594   // Return the new vector
13595   return DAG.getBuildVector(VT, DL, Ops);
13596 }
13597 
13598 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13599     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13600   assert(!OriginalLoad->isVolatile());
13601 
13602   EVT ResultVT = EVE->getValueType(0);
13603   EVT VecEltVT = InVecVT.getVectorElementType();
13604   unsigned Align = OriginalLoad->getAlignment();
13605   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13606       VecEltVT.getTypeForEVT(*DAG.getContext()));
13607 
13608   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13609     return SDValue();
13610 
13611   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13612     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13613   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13614     return SDValue();
13615 
13616   Align = NewAlign;
13617 
13618   SDValue NewPtr = OriginalLoad->getBasePtr();
13619   SDValue Offset;
13620   EVT PtrType = NewPtr.getValueType();
13621   MachinePointerInfo MPI;
13622   SDLoc DL(EVE);
13623   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13624     int Elt = ConstEltNo->getZExtValue();
13625     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13626     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13627     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13628   } else {
13629     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13630     Offset = DAG.getNode(
13631         ISD::MUL, DL, PtrType, Offset,
13632         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13633     MPI = OriginalLoad->getPointerInfo();
13634   }
13635   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13636 
13637   // The replacement we need to do here is a little tricky: we need to
13638   // replace an extractelement of a load with a load.
13639   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13640   // Note that this replacement assumes that the extractvalue is the only
13641   // use of the load; that's okay because we don't want to perform this
13642   // transformation in other cases anyway.
13643   SDValue Load;
13644   SDValue Chain;
13645   if (ResultVT.bitsGT(VecEltVT)) {
13646     // If the result type of vextract is wider than the load, then issue an
13647     // extending load instead.
13648     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13649                                                   VecEltVT)
13650                                    ? ISD::ZEXTLOAD
13651                                    : ISD::EXTLOAD;
13652     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13653                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13654                           Align, OriginalLoad->getMemOperand()->getFlags(),
13655                           OriginalLoad->getAAInfo());
13656     Chain = Load.getValue(1);
13657   } else {
13658     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13659                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13660                        OriginalLoad->getAAInfo());
13661     Chain = Load.getValue(1);
13662     if (ResultVT.bitsLT(VecEltVT))
13663       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13664     else
13665       Load = DAG.getBitcast(ResultVT, Load);
13666   }
13667   WorklistRemover DeadNodes(*this);
13668   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13669   SDValue To[] = { Load, Chain };
13670   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13671   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13672   // worklist explicitly as well.
13673   AddToWorklist(Load.getNode());
13674   AddUsersToWorklist(Load.getNode()); // Add users too
13675   // Make sure to revisit this node to clean it up; it will usually be dead.
13676   AddToWorklist(EVE);
13677   ++OpsNarrowed;
13678   return SDValue(EVE, 0);
13679 }
13680 
13681 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13682   // (vextract (scalar_to_vector val, 0) -> val
13683   SDValue InVec = N->getOperand(0);
13684   EVT VT = InVec.getValueType();
13685   EVT NVT = N->getValueType(0);
13686 
13687   if (InVec.isUndef())
13688     return DAG.getUNDEF(NVT);
13689 
13690   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13691     // Check if the result type doesn't match the inserted element type. A
13692     // SCALAR_TO_VECTOR may truncate the inserted element and the
13693     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13694     SDValue InOp = InVec.getOperand(0);
13695     if (InOp.getValueType() != NVT) {
13696       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13697       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13698     }
13699     return InOp;
13700   }
13701 
13702   SDValue EltNo = N->getOperand(1);
13703   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13704 
13705   // extract_vector_elt (build_vector x, y), 1 -> y
13706   if (ConstEltNo &&
13707       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13708       TLI.isTypeLegal(VT) &&
13709       (InVec.hasOneUse() ||
13710        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13711     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13712     EVT InEltVT = Elt.getValueType();
13713 
13714     // Sometimes build_vector's scalar input types do not match result type.
13715     if (NVT == InEltVT)
13716       return Elt;
13717 
13718     // TODO: It may be useful to truncate if free if the build_vector implicitly
13719     // converts.
13720   }
13721 
13722   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
13723   bool isLE = DAG.getDataLayout().isLittleEndian();
13724   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
13725   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13726       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
13727     SDValue BCSrc = InVec.getOperand(0);
13728     if (BCSrc.getValueType().isScalarInteger())
13729       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13730   }
13731 
13732   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13733   //
13734   // This only really matters if the index is non-constant since other combines
13735   // on the constant elements already work.
13736   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13737       EltNo == InVec.getOperand(2)) {
13738     SDValue Elt = InVec.getOperand(1);
13739     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13740   }
13741 
13742   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13743   // We only perform this optimization before the op legalization phase because
13744   // we may introduce new vector instructions which are not backed by TD
13745   // patterns. For example on AVX, extracting elements from a wide vector
13746   // without using extract_subvector. However, if we can find an underlying
13747   // scalar value, then we can always use that.
13748   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13749     int NumElem = VT.getVectorNumElements();
13750     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13751     // Find the new index to extract from.
13752     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13753 
13754     // Extracting an undef index is undef.
13755     if (OrigElt == -1)
13756       return DAG.getUNDEF(NVT);
13757 
13758     // Select the right vector half to extract from.
13759     SDValue SVInVec;
13760     if (OrigElt < NumElem) {
13761       SVInVec = InVec->getOperand(0);
13762     } else {
13763       SVInVec = InVec->getOperand(1);
13764       OrigElt -= NumElem;
13765     }
13766 
13767     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13768       SDValue InOp = SVInVec.getOperand(OrigElt);
13769       if (InOp.getValueType() != NVT) {
13770         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13771         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13772       }
13773 
13774       return InOp;
13775     }
13776 
13777     // FIXME: We should handle recursing on other vector shuffles and
13778     // scalar_to_vector here as well.
13779 
13780     if (!LegalOperations) {
13781       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13782       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13783                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13784     }
13785   }
13786 
13787   bool BCNumEltsChanged = false;
13788   EVT ExtVT = VT.getVectorElementType();
13789   EVT LVT = ExtVT;
13790 
13791   // If the result of load has to be truncated, then it's not necessarily
13792   // profitable.
13793   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13794     return SDValue();
13795 
13796   if (InVec.getOpcode() == ISD::BITCAST) {
13797     // Don't duplicate a load with other uses.
13798     if (!InVec.hasOneUse())
13799       return SDValue();
13800 
13801     EVT BCVT = InVec.getOperand(0).getValueType();
13802     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13803       return SDValue();
13804     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13805       BCNumEltsChanged = true;
13806     InVec = InVec.getOperand(0);
13807     ExtVT = BCVT.getVectorElementType();
13808   }
13809 
13810   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13811   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13812       ISD::isNormalLoad(InVec.getNode()) &&
13813       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13814     SDValue Index = N->getOperand(1);
13815     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13816       if (!OrigLoad->isVolatile()) {
13817         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13818                                                              OrigLoad);
13819       }
13820     }
13821   }
13822 
13823   // Perform only after legalization to ensure build_vector / vector_shuffle
13824   // optimizations have already been done.
13825   if (!LegalOperations) return SDValue();
13826 
13827   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13828   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13829   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13830 
13831   if (ConstEltNo) {
13832     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13833 
13834     LoadSDNode *LN0 = nullptr;
13835     const ShuffleVectorSDNode *SVN = nullptr;
13836     if (ISD::isNormalLoad(InVec.getNode())) {
13837       LN0 = cast<LoadSDNode>(InVec);
13838     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13839                InVec.getOperand(0).getValueType() == ExtVT &&
13840                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13841       // Don't duplicate a load with other uses.
13842       if (!InVec.hasOneUse())
13843         return SDValue();
13844 
13845       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13846     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13847       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13848       // =>
13849       // (load $addr+1*size)
13850 
13851       // Don't duplicate a load with other uses.
13852       if (!InVec.hasOneUse())
13853         return SDValue();
13854 
13855       // If the bit convert changed the number of elements, it is unsafe
13856       // to examine the mask.
13857       if (BCNumEltsChanged)
13858         return SDValue();
13859 
13860       // Select the input vector, guarding against out of range extract vector.
13861       unsigned NumElems = VT.getVectorNumElements();
13862       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13863       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13864 
13865       if (InVec.getOpcode() == ISD::BITCAST) {
13866         // Don't duplicate a load with other uses.
13867         if (!InVec.hasOneUse())
13868           return SDValue();
13869 
13870         InVec = InVec.getOperand(0);
13871       }
13872       if (ISD::isNormalLoad(InVec.getNode())) {
13873         LN0 = cast<LoadSDNode>(InVec);
13874         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13875         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13876       }
13877     }
13878 
13879     // Make sure we found a non-volatile load and the extractelement is
13880     // the only use.
13881     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13882       return SDValue();
13883 
13884     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13885     if (Elt == -1)
13886       return DAG.getUNDEF(LVT);
13887 
13888     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13889   }
13890 
13891   return SDValue();
13892 }
13893 
13894 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13895 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13896   // We perform this optimization post type-legalization because
13897   // the type-legalizer often scalarizes integer-promoted vectors.
13898   // Performing this optimization before may create bit-casts which
13899   // will be type-legalized to complex code sequences.
13900   // We perform this optimization only before the operation legalizer because we
13901   // may introduce illegal operations.
13902   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13903     return SDValue();
13904 
13905   unsigned NumInScalars = N->getNumOperands();
13906   SDLoc DL(N);
13907   EVT VT = N->getValueType(0);
13908 
13909   // Check to see if this is a BUILD_VECTOR of a bunch of values
13910   // which come from any_extend or zero_extend nodes. If so, we can create
13911   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13912   // optimizations. We do not handle sign-extend because we can't fill the sign
13913   // using shuffles.
13914   EVT SourceType = MVT::Other;
13915   bool AllAnyExt = true;
13916 
13917   for (unsigned i = 0; i != NumInScalars; ++i) {
13918     SDValue In = N->getOperand(i);
13919     // Ignore undef inputs.
13920     if (In.isUndef()) continue;
13921 
13922     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13923     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13924 
13925     // Abort if the element is not an extension.
13926     if (!ZeroExt && !AnyExt) {
13927       SourceType = MVT::Other;
13928       break;
13929     }
13930 
13931     // The input is a ZeroExt or AnyExt. Check the original type.
13932     EVT InTy = In.getOperand(0).getValueType();
13933 
13934     // Check that all of the widened source types are the same.
13935     if (SourceType == MVT::Other)
13936       // First time.
13937       SourceType = InTy;
13938     else if (InTy != SourceType) {
13939       // Multiple income types. Abort.
13940       SourceType = MVT::Other;
13941       break;
13942     }
13943 
13944     // Check if all of the extends are ANY_EXTENDs.
13945     AllAnyExt &= AnyExt;
13946   }
13947 
13948   // In order to have valid types, all of the inputs must be extended from the
13949   // same source type and all of the inputs must be any or zero extend.
13950   // Scalar sizes must be a power of two.
13951   EVT OutScalarTy = VT.getScalarType();
13952   bool ValidTypes = SourceType != MVT::Other &&
13953                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13954                  isPowerOf2_32(SourceType.getSizeInBits());
13955 
13956   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13957   // turn into a single shuffle instruction.
13958   if (!ValidTypes)
13959     return SDValue();
13960 
13961   bool isLE = DAG.getDataLayout().isLittleEndian();
13962   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13963   assert(ElemRatio > 1 && "Invalid element size ratio");
13964   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13965                                DAG.getConstant(0, DL, SourceType);
13966 
13967   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13968   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13969 
13970   // Populate the new build_vector
13971   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13972     SDValue Cast = N->getOperand(i);
13973     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13974             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13975             Cast.isUndef()) && "Invalid cast opcode");
13976     SDValue In;
13977     if (Cast.isUndef())
13978       In = DAG.getUNDEF(SourceType);
13979     else
13980       In = Cast->getOperand(0);
13981     unsigned Index = isLE ? (i * ElemRatio) :
13982                             (i * ElemRatio + (ElemRatio - 1));
13983 
13984     assert(Index < Ops.size() && "Invalid index");
13985     Ops[Index] = In;
13986   }
13987 
13988   // The type of the new BUILD_VECTOR node.
13989   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13990   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13991          "Invalid vector size");
13992   // Check if the new vector type is legal.
13993   if (!isTypeLegal(VecVT)) return SDValue();
13994 
13995   // Make the new BUILD_VECTOR.
13996   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13997 
13998   // The new BUILD_VECTOR node has the potential to be further optimized.
13999   AddToWorklist(BV.getNode());
14000   // Bitcast to the desired type.
14001   return DAG.getBitcast(VT, BV);
14002 }
14003 
14004 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14005   EVT VT = N->getValueType(0);
14006 
14007   unsigned NumInScalars = N->getNumOperands();
14008   SDLoc DL(N);
14009 
14010   EVT SrcVT = MVT::Other;
14011   unsigned Opcode = ISD::DELETED_NODE;
14012   unsigned NumDefs = 0;
14013 
14014   for (unsigned i = 0; i != NumInScalars; ++i) {
14015     SDValue In = N->getOperand(i);
14016     unsigned Opc = In.getOpcode();
14017 
14018     if (Opc == ISD::UNDEF)
14019       continue;
14020 
14021     // If all scalar values are floats and converted from integers.
14022     if (Opcode == ISD::DELETED_NODE &&
14023         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14024       Opcode = Opc;
14025     }
14026 
14027     if (Opc != Opcode)
14028       return SDValue();
14029 
14030     EVT InVT = In.getOperand(0).getValueType();
14031 
14032     // If all scalar values are typed differently, bail out. It's chosen to
14033     // simplify BUILD_VECTOR of integer types.
14034     if (SrcVT == MVT::Other)
14035       SrcVT = InVT;
14036     if (SrcVT != InVT)
14037       return SDValue();
14038     NumDefs++;
14039   }
14040 
14041   // If the vector has just one element defined, it's not worth to fold it into
14042   // a vectorized one.
14043   if (NumDefs < 2)
14044     return SDValue();
14045 
14046   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14047          && "Should only handle conversion from integer to float.");
14048   assert(SrcVT != MVT::Other && "Cannot determine source type!");
14049 
14050   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14051 
14052   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14053     return SDValue();
14054 
14055   // Just because the floating-point vector type is legal does not necessarily
14056   // mean that the corresponding integer vector type is.
14057   if (!isTypeLegal(NVT))
14058     return SDValue();
14059 
14060   SmallVector<SDValue, 8> Opnds;
14061   for (unsigned i = 0; i != NumInScalars; ++i) {
14062     SDValue In = N->getOperand(i);
14063 
14064     if (In.isUndef())
14065       Opnds.push_back(DAG.getUNDEF(SrcVT));
14066     else
14067       Opnds.push_back(In.getOperand(0));
14068   }
14069   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14070   AddToWorklist(BV.getNode());
14071 
14072   return DAG.getNode(Opcode, DL, VT, BV);
14073 }
14074 
14075 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14076                                            ArrayRef<int> VectorMask,
14077                                            SDValue VecIn1, SDValue VecIn2,
14078                                            unsigned LeftIdx) {
14079   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14080   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14081 
14082   EVT VT = N->getValueType(0);
14083   EVT InVT1 = VecIn1.getValueType();
14084   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14085 
14086   unsigned Vec2Offset = InVT1.getVectorNumElements();
14087   unsigned NumElems = VT.getVectorNumElements();
14088   unsigned ShuffleNumElems = NumElems;
14089 
14090   // We can't generate a shuffle node with mismatched input and output types.
14091   // Try to make the types match the type of the output.
14092   if (InVT1 != VT || InVT2 != VT) {
14093     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14094       // If the output vector length is a multiple of both input lengths,
14095       // we can concatenate them and pad the rest with undefs.
14096       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14097       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14098       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14099       ConcatOps[0] = VecIn1;
14100       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14101       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14102       VecIn2 = SDValue();
14103     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14104       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
14105         return SDValue();
14106 
14107       if (!VecIn2.getNode()) {
14108         // If we only have one input vector, and it's twice the size of the
14109         // output, split it in two.
14110         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14111                              DAG.getConstant(NumElems, DL, IdxTy));
14112         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14113         // Since we now have shorter input vectors, adjust the offset of the
14114         // second vector's start.
14115         Vec2Offset = NumElems;
14116       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14117         // VecIn1 is wider than the output, and we have another, possibly
14118         // smaller input. Pad the smaller input with undefs, shuffle at the
14119         // input vector width, and extract the output.
14120         // The shuffle type is different than VT, so check legality again.
14121         if (LegalOperations &&
14122             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14123           return SDValue();
14124 
14125         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14126         // lower it back into a BUILD_VECTOR. So if the inserted type is
14127         // illegal, don't even try.
14128         if (InVT1 != InVT2) {
14129           if (!TLI.isTypeLegal(InVT2))
14130             return SDValue();
14131           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14132                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14133         }
14134         ShuffleNumElems = NumElems * 2;
14135       } else {
14136         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14137         // than VecIn1. We can't handle this for now - this case will disappear
14138         // when we start sorting the vectors by type.
14139         return SDValue();
14140       }
14141     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14142                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14143       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14144       ConcatOps[0] = VecIn2;
14145       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14146     } else {
14147       // TODO: Support cases where the length mismatch isn't exactly by a
14148       // factor of 2.
14149       // TODO: Move this check upwards, so that if we have bad type
14150       // mismatches, we don't create any DAG nodes.
14151       return SDValue();
14152     }
14153   }
14154 
14155   // Initialize mask to undef.
14156   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14157 
14158   // Only need to run up to the number of elements actually used, not the
14159   // total number of elements in the shuffle - if we are shuffling a wider
14160   // vector, the high lanes should be set to undef.
14161   for (unsigned i = 0; i != NumElems; ++i) {
14162     if (VectorMask[i] <= 0)
14163       continue;
14164 
14165     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14166     if (VectorMask[i] == (int)LeftIdx) {
14167       Mask[i] = ExtIndex;
14168     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14169       Mask[i] = Vec2Offset + ExtIndex;
14170     }
14171   }
14172 
14173   // The type the input vectors may have changed above.
14174   InVT1 = VecIn1.getValueType();
14175 
14176   // If we already have a VecIn2, it should have the same type as VecIn1.
14177   // If we don't, get an undef/zero vector of the appropriate type.
14178   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14179   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14180 
14181   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14182   if (ShuffleNumElems > NumElems)
14183     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14184 
14185   return Shuffle;
14186 }
14187 
14188 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14189 // operations. If the types of the vectors we're extracting from allow it,
14190 // turn this into a vector_shuffle node.
14191 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14192   SDLoc DL(N);
14193   EVT VT = N->getValueType(0);
14194 
14195   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14196   if (!isTypeLegal(VT))
14197     return SDValue();
14198 
14199   // May only combine to shuffle after legalize if shuffle is legal.
14200   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14201     return SDValue();
14202 
14203   bool UsesZeroVector = false;
14204   unsigned NumElems = N->getNumOperands();
14205 
14206   // Record, for each element of the newly built vector, which input vector
14207   // that element comes from. -1 stands for undef, 0 for the zero vector,
14208   // and positive values for the input vectors.
14209   // VectorMask maps each element to its vector number, and VecIn maps vector
14210   // numbers to their initial SDValues.
14211 
14212   SmallVector<int, 8> VectorMask(NumElems, -1);
14213   SmallVector<SDValue, 8> VecIn;
14214   VecIn.push_back(SDValue());
14215 
14216   for (unsigned i = 0; i != NumElems; ++i) {
14217     SDValue Op = N->getOperand(i);
14218 
14219     if (Op.isUndef())
14220       continue;
14221 
14222     // See if we can use a blend with a zero vector.
14223     // TODO: Should we generalize this to a blend with an arbitrary constant
14224     // vector?
14225     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14226       UsesZeroVector = true;
14227       VectorMask[i] = 0;
14228       continue;
14229     }
14230 
14231     // Not an undef or zero. If the input is something other than an
14232     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14233     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14234         !isa<ConstantSDNode>(Op.getOperand(1)))
14235       return SDValue();
14236 
14237     SDValue ExtractedFromVec = Op.getOperand(0);
14238 
14239     // All inputs must have the same element type as the output.
14240     if (VT.getVectorElementType() !=
14241         ExtractedFromVec.getValueType().getVectorElementType())
14242       return SDValue();
14243 
14244     // Have we seen this input vector before?
14245     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14246     // a map back from SDValues to numbers isn't worth it.
14247     unsigned Idx = std::distance(
14248         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14249     if (Idx == VecIn.size())
14250       VecIn.push_back(ExtractedFromVec);
14251 
14252     VectorMask[i] = Idx;
14253   }
14254 
14255   // If we didn't find at least one input vector, bail out.
14256   if (VecIn.size() < 2)
14257     return SDValue();
14258 
14259   // TODO: We want to sort the vectors by descending length, so that adjacent
14260   // pairs have similar length, and the longer vector is always first in the
14261   // pair.
14262 
14263   // TODO: Should this fire if some of the input vectors has illegal type (like
14264   // it does now), or should we let legalization run its course first?
14265 
14266   // Shuffle phase:
14267   // Take pairs of vectors, and shuffle them so that the result has elements
14268   // from these vectors in the correct places.
14269   // For example, given:
14270   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14271   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14272   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14273   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14274   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14275   // We will generate:
14276   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14277   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14278   SmallVector<SDValue, 4> Shuffles;
14279   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14280     unsigned LeftIdx = 2 * In + 1;
14281     SDValue VecLeft = VecIn[LeftIdx];
14282     SDValue VecRight =
14283         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14284 
14285     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14286                                                 VecRight, LeftIdx))
14287       Shuffles.push_back(Shuffle);
14288     else
14289       return SDValue();
14290   }
14291 
14292   // If we need the zero vector as an "ingredient" in the blend tree, add it
14293   // to the list of shuffles.
14294   if (UsesZeroVector)
14295     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14296                                       : DAG.getConstantFP(0.0, DL, VT));
14297 
14298   // If we only have one shuffle, we're done.
14299   if (Shuffles.size() == 1)
14300     return Shuffles[0];
14301 
14302   // Update the vector mask to point to the post-shuffle vectors.
14303   for (int &Vec : VectorMask)
14304     if (Vec == 0)
14305       Vec = Shuffles.size() - 1;
14306     else
14307       Vec = (Vec - 1) / 2;
14308 
14309   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14310   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14311   // generate:
14312   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14313   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14314   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14315   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14316   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14317   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14318   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14319 
14320   // Make sure the initial size of the shuffle list is even.
14321   if (Shuffles.size() % 2)
14322     Shuffles.push_back(DAG.getUNDEF(VT));
14323 
14324   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14325     if (CurSize % 2) {
14326       Shuffles[CurSize] = DAG.getUNDEF(VT);
14327       CurSize++;
14328     }
14329     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14330       int Left = 2 * In;
14331       int Right = 2 * In + 1;
14332       SmallVector<int, 8> Mask(NumElems, -1);
14333       for (unsigned i = 0; i != NumElems; ++i) {
14334         if (VectorMask[i] == Left) {
14335           Mask[i] = i;
14336           VectorMask[i] = In;
14337         } else if (VectorMask[i] == Right) {
14338           Mask[i] = i + NumElems;
14339           VectorMask[i] = In;
14340         }
14341       }
14342 
14343       Shuffles[In] =
14344           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14345     }
14346   }
14347 
14348   return Shuffles[0];
14349 }
14350 
14351 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14352 // operations which can be matched to a truncate or to a shuffle-truncate.
14353 SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
14354   // TODO: Add support for big-endian.
14355   if (DAG.getDataLayout().isBigEndian())
14356     return SDValue();
14357   if (N->getNumOperands() < 2)
14358     return SDValue();
14359   SDLoc DL(N);
14360   EVT VT = N->getValueType(0);
14361   unsigned NumElems = N->getNumOperands();
14362 
14363   if (!isTypeLegal(VT))
14364     return SDValue();
14365 
14366   // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
14367   // index, bail out.
14368   // TODO: Allow undef elements in some cases?
14369   if (any_of(N->ops(), [VT](SDValue Op) {
14370         return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14371                !isa<ConstantSDNode>(Op.getOperand(1)) ||
14372                Op.getValueType() != VT.getVectorElementType();
14373       }))
14374     return SDValue();
14375 
14376   // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
14377   auto GetExtractIdx = [](SDValue Extract) {
14378     return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
14379   };
14380 
14381   // The offset is defined to be the BUILD_VECTOR's first operand (assuming no
14382   // undef and little-endian).
14383   int Offset = GetExtractIdx(N->getOperand(0));
14384 
14385   // Compute the stride from the next operand.
14386   int Stride = GetExtractIdx(N->getOperand(1)) - Offset;
14387   SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
14388 
14389   // Proceed only if the stride and the types can be matched to a truncate.
14390   if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
14391       (ExtractedFromVec.getValueType().getVectorNumElements() !=
14392        Stride * NumElems) ||
14393       (VT.getScalarSizeInBits() * Stride > 64))
14394     return SDValue();
14395 
14396   // Check remaining operands are consistent with the computed stride.
14397   for (unsigned i = 1; i != NumElems; ++i) {
14398     SDValue Op = N->getOperand(i);
14399 
14400     if ((Op.getOperand(0) != ExtractedFromVec) ||
14401         (GetExtractIdx(Op) != Stride * i + Offset))
14402       return SDValue();
14403   }
14404 
14405   SDValue Res = ExtractedFromVec;
14406   EVT TruncVT =
14407       VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
14408   if (Offset) {
14409     // If the first index is non-zero, need to shuffle elements of interest to
14410     // lower parts of the vector's elements the truncate will act upon.
14411     // TODO: Generalize to compute the permute-shuffle that will prepare any
14412     // element permutation for the truncate, and let the target decide if
14413     // profitable.
14414     EVT ExtractedVT = ExtractedFromVec.getValueType();
14415     SmallVector<int, 64> Mask;
14416     for (unsigned i = 0; i != NumElems; ++i) {
14417       Mask.push_back(Offset + i * Stride);
14418       // Pad the elements that will be lost after the truncate with undefs.
14419       Mask.append(Stride - 1, -1);
14420     }
14421     if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) ||
14422         !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT,
14423                                                               TruncVT))
14424       return SDValue();
14425     Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res,
14426                                DAG.getUNDEF(ExtractedVT), Mask);
14427   }
14428   // Construct the truncate.
14429   LLVMContext &Ctx = *DAG.getContext();
14430   EVT NewVT = VT.getVectorVT(
14431       Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
14432 
14433   Res = DAG.getBitcast(NewVT, Res);
14434   Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
14435   return DAG.getBitcast(VT, Res);
14436 }
14437 
14438 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14439   EVT VT = N->getValueType(0);
14440 
14441   // A vector built entirely of undefs is undef.
14442   if (ISD::allOperandsUndef(N))
14443     return DAG.getUNDEF(VT);
14444 
14445   // Check if we can express BUILD VECTOR via subvector extract.
14446   if (!LegalTypes && (N->getNumOperands() > 1)) {
14447     SDValue Op0 = N->getOperand(0);
14448     auto checkElem = [&](SDValue Op) -> uint64_t {
14449       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14450           (Op0.getOperand(0) == Op.getOperand(0)))
14451         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14452           return CNode->getZExtValue();
14453       return -1;
14454     };
14455 
14456     int Offset = checkElem(Op0);
14457     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14458       if (Offset + i != checkElem(N->getOperand(i))) {
14459         Offset = -1;
14460         break;
14461       }
14462     }
14463 
14464     if ((Offset == 0) &&
14465         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14466       return Op0.getOperand(0);
14467     if ((Offset != -1) &&
14468         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14469          0)) // IDX must be multiple of output size.
14470       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14471                          Op0.getOperand(0), Op0.getOperand(1));
14472   }
14473 
14474   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14475     return V;
14476 
14477   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14478     return V;
14479 
14480   if (TLI.isDesirableToCombineBuildVectorToTruncate())
14481     if (SDValue V = reduceBuildVecToTrunc(N))
14482       return V;
14483 
14484   if (SDValue V = reduceBuildVecToShuffle(N))
14485     return V;
14486 
14487   return SDValue();
14488 }
14489 
14490 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14491   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14492   EVT OpVT = N->getOperand(0).getValueType();
14493 
14494   // If the operands are legal vectors, leave them alone.
14495   if (TLI.isTypeLegal(OpVT))
14496     return SDValue();
14497 
14498   SDLoc DL(N);
14499   EVT VT = N->getValueType(0);
14500   SmallVector<SDValue, 8> Ops;
14501 
14502   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14503   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14504 
14505   // Keep track of what we encounter.
14506   bool AnyInteger = false;
14507   bool AnyFP = false;
14508   for (const SDValue &Op : N->ops()) {
14509     if (ISD::BITCAST == Op.getOpcode() &&
14510         !Op.getOperand(0).getValueType().isVector())
14511       Ops.push_back(Op.getOperand(0));
14512     else if (ISD::UNDEF == Op.getOpcode())
14513       Ops.push_back(ScalarUndef);
14514     else
14515       return SDValue();
14516 
14517     // Note whether we encounter an integer or floating point scalar.
14518     // If it's neither, bail out, it could be something weird like x86mmx.
14519     EVT LastOpVT = Ops.back().getValueType();
14520     if (LastOpVT.isFloatingPoint())
14521       AnyFP = true;
14522     else if (LastOpVT.isInteger())
14523       AnyInteger = true;
14524     else
14525       return SDValue();
14526   }
14527 
14528   // If any of the operands is a floating point scalar bitcast to a vector,
14529   // use floating point types throughout, and bitcast everything.
14530   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14531   if (AnyFP) {
14532     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14533     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14534     if (AnyInteger) {
14535       for (SDValue &Op : Ops) {
14536         if (Op.getValueType() == SVT)
14537           continue;
14538         if (Op.isUndef())
14539           Op = ScalarUndef;
14540         else
14541           Op = DAG.getBitcast(SVT, Op);
14542       }
14543     }
14544   }
14545 
14546   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14547                                VT.getSizeInBits() / SVT.getSizeInBits());
14548   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14549 }
14550 
14551 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14552 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14553 // most two distinct vectors the same size as the result, attempt to turn this
14554 // into a legal shuffle.
14555 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14556   EVT VT = N->getValueType(0);
14557   EVT OpVT = N->getOperand(0).getValueType();
14558   int NumElts = VT.getVectorNumElements();
14559   int NumOpElts = OpVT.getVectorNumElements();
14560 
14561   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14562   SmallVector<int, 8> Mask;
14563 
14564   for (SDValue Op : N->ops()) {
14565     // Peek through any bitcast.
14566     while (Op.getOpcode() == ISD::BITCAST)
14567       Op = Op.getOperand(0);
14568 
14569     // UNDEF nodes convert to UNDEF shuffle mask values.
14570     if (Op.isUndef()) {
14571       Mask.append((unsigned)NumOpElts, -1);
14572       continue;
14573     }
14574 
14575     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14576       return SDValue();
14577 
14578     // What vector are we extracting the subvector from and at what index?
14579     SDValue ExtVec = Op.getOperand(0);
14580 
14581     // We want the EVT of the original extraction to correctly scale the
14582     // extraction index.
14583     EVT ExtVT = ExtVec.getValueType();
14584 
14585     // Peek through any bitcast.
14586     while (ExtVec.getOpcode() == ISD::BITCAST)
14587       ExtVec = ExtVec.getOperand(0);
14588 
14589     // UNDEF nodes convert to UNDEF shuffle mask values.
14590     if (ExtVec.isUndef()) {
14591       Mask.append((unsigned)NumOpElts, -1);
14592       continue;
14593     }
14594 
14595     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14596       return SDValue();
14597     int ExtIdx = Op.getConstantOperandVal(1);
14598 
14599     // Ensure that we are extracting a subvector from a vector the same
14600     // size as the result.
14601     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14602       return SDValue();
14603 
14604     // Scale the subvector index to account for any bitcast.
14605     int NumExtElts = ExtVT.getVectorNumElements();
14606     if (0 == (NumExtElts % NumElts))
14607       ExtIdx /= (NumExtElts / NumElts);
14608     else if (0 == (NumElts % NumExtElts))
14609       ExtIdx *= (NumElts / NumExtElts);
14610     else
14611       return SDValue();
14612 
14613     // At most we can reference 2 inputs in the final shuffle.
14614     if (SV0.isUndef() || SV0 == ExtVec) {
14615       SV0 = ExtVec;
14616       for (int i = 0; i != NumOpElts; ++i)
14617         Mask.push_back(i + ExtIdx);
14618     } else if (SV1.isUndef() || SV1 == ExtVec) {
14619       SV1 = ExtVec;
14620       for (int i = 0; i != NumOpElts; ++i)
14621         Mask.push_back(i + ExtIdx + NumElts);
14622     } else {
14623       return SDValue();
14624     }
14625   }
14626 
14627   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14628     return SDValue();
14629 
14630   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14631                               DAG.getBitcast(VT, SV1), Mask);
14632 }
14633 
14634 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14635   // If we only have one input vector, we don't need to do any concatenation.
14636   if (N->getNumOperands() == 1)
14637     return N->getOperand(0);
14638 
14639   // Check if all of the operands are undefs.
14640   EVT VT = N->getValueType(0);
14641   if (ISD::allOperandsUndef(N))
14642     return DAG.getUNDEF(VT);
14643 
14644   // Optimize concat_vectors where all but the first of the vectors are undef.
14645   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14646         return Op.isUndef();
14647       })) {
14648     SDValue In = N->getOperand(0);
14649     assert(In.getValueType().isVector() && "Must concat vectors");
14650 
14651     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14652     if (In->getOpcode() == ISD::BITCAST &&
14653         !In->getOperand(0)->getValueType(0).isVector()) {
14654       SDValue Scalar = In->getOperand(0);
14655 
14656       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14657       // look through the trunc so we can still do the transform:
14658       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14659       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14660           !TLI.isTypeLegal(Scalar.getValueType()) &&
14661           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14662         Scalar = Scalar->getOperand(0);
14663 
14664       EVT SclTy = Scalar->getValueType(0);
14665 
14666       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14667         return SDValue();
14668 
14669       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14670       if (VNTNumElms < 2)
14671         return SDValue();
14672 
14673       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14674       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14675         return SDValue();
14676 
14677       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14678       return DAG.getBitcast(VT, Res);
14679     }
14680   }
14681 
14682   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14683   // We have already tested above for an UNDEF only concatenation.
14684   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14685   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14686   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14687     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14688   };
14689   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14690     SmallVector<SDValue, 8> Opnds;
14691     EVT SVT = VT.getScalarType();
14692 
14693     EVT MinVT = SVT;
14694     if (!SVT.isFloatingPoint()) {
14695       // If BUILD_VECTOR are from built from integer, they may have different
14696       // operand types. Get the smallest type and truncate all operands to it.
14697       bool FoundMinVT = false;
14698       for (const SDValue &Op : N->ops())
14699         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14700           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14701           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14702           FoundMinVT = true;
14703         }
14704       assert(FoundMinVT && "Concat vector type mismatch");
14705     }
14706 
14707     for (const SDValue &Op : N->ops()) {
14708       EVT OpVT = Op.getValueType();
14709       unsigned NumElts = OpVT.getVectorNumElements();
14710 
14711       if (ISD::UNDEF == Op.getOpcode())
14712         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14713 
14714       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14715         if (SVT.isFloatingPoint()) {
14716           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14717           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14718         } else {
14719           for (unsigned i = 0; i != NumElts; ++i)
14720             Opnds.push_back(
14721                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14722         }
14723       }
14724     }
14725 
14726     assert(VT.getVectorNumElements() == Opnds.size() &&
14727            "Concat vector type mismatch");
14728     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14729   }
14730 
14731   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14732   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14733     return V;
14734 
14735   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14736   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14737     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14738       return V;
14739 
14740   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14741   // nodes often generate nop CONCAT_VECTOR nodes.
14742   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14743   // place the incoming vectors at the exact same location.
14744   SDValue SingleSource = SDValue();
14745   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14746 
14747   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14748     SDValue Op = N->getOperand(i);
14749 
14750     if (Op.isUndef())
14751       continue;
14752 
14753     // Check if this is the identity extract:
14754     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14755       return SDValue();
14756 
14757     // Find the single incoming vector for the extract_subvector.
14758     if (SingleSource.getNode()) {
14759       if (Op.getOperand(0) != SingleSource)
14760         return SDValue();
14761     } else {
14762       SingleSource = Op.getOperand(0);
14763 
14764       // Check the source type is the same as the type of the result.
14765       // If not, this concat may extend the vector, so we can not
14766       // optimize it away.
14767       if (SingleSource.getValueType() != N->getValueType(0))
14768         return SDValue();
14769     }
14770 
14771     unsigned IdentityIndex = i * PartNumElem;
14772     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14773     // The extract index must be constant.
14774     if (!CS)
14775       return SDValue();
14776 
14777     // Check that we are reading from the identity index.
14778     if (CS->getZExtValue() != IdentityIndex)
14779       return SDValue();
14780   }
14781 
14782   if (SingleSource.getNode())
14783     return SingleSource;
14784 
14785   return SDValue();
14786 }
14787 
14788 /// If we are extracting a subvector produced by a wide binary operator with at
14789 /// at least one operand that was the result of a vector concatenation, then try
14790 /// to use the narrow vector operands directly to avoid the concatenation and
14791 /// extraction.
14792 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
14793   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
14794   // some of these bailouts with other transforms.
14795 
14796   // The extract index must be a constant, so we can map it to a concat operand.
14797   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14798   if (!ExtractIndex)
14799     return SDValue();
14800 
14801   // Only handle the case where we are doubling and then halving. A larger ratio
14802   // may require more than two narrow binops to replace the wide binop.
14803   EVT VT = Extract->getValueType(0);
14804   unsigned NumElems = VT.getVectorNumElements();
14805   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
14806          "Extract index is not a multiple of the vector length.");
14807   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
14808     return SDValue();
14809 
14810   // We are looking for an optionally bitcasted wide vector binary operator
14811   // feeding an extract subvector.
14812   SDValue BinOp = Extract->getOperand(0);
14813   if (BinOp.getOpcode() == ISD::BITCAST)
14814     BinOp = BinOp.getOperand(0);
14815 
14816   // TODO: The motivating case for this transform is an x86 AVX1 target. That
14817   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
14818   // flavors, but no other 256-bit integer support. This could be extended to
14819   // handle any binop, but that may require fixing/adding other folds to avoid
14820   // codegen regressions.
14821   unsigned BOpcode = BinOp.getOpcode();
14822   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
14823     return SDValue();
14824 
14825   // The binop must be a vector type, so we can chop it in half.
14826   EVT WideBVT = BinOp.getValueType();
14827   if (!WideBVT.isVector())
14828     return SDValue();
14829 
14830   // Bail out if the target does not support a narrower version of the binop.
14831   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
14832                                    WideBVT.getVectorNumElements() / 2);
14833   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14834   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
14835     return SDValue();
14836 
14837   // Peek through bitcasts of the binary operator operands if needed.
14838   SDValue LHS = BinOp.getOperand(0);
14839   if (LHS.getOpcode() == ISD::BITCAST)
14840     LHS = LHS.getOperand(0);
14841 
14842   SDValue RHS = BinOp.getOperand(1);
14843   if (RHS.getOpcode() == ISD::BITCAST)
14844     RHS = RHS.getOperand(0);
14845 
14846   // We need at least one concatenation operation of a binop operand to make
14847   // this transform worthwhile. The concat must double the input vector sizes.
14848   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
14849   bool ConcatL =
14850       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
14851   bool ConcatR =
14852       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
14853   if (!ConcatL && !ConcatR)
14854     return SDValue();
14855 
14856   // If one of the binop operands was not the result of a concat, we must
14857   // extract a half-sized operand for our new narrow binop. We can't just reuse
14858   // the original extract index operand because we may have bitcasted.
14859   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
14860   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
14861   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
14862   SDLoc DL(Extract);
14863 
14864   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
14865   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
14866   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
14867   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
14868                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14869                                     BinOp.getOperand(0),
14870                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14871 
14872   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
14873                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14874                                     BinOp.getOperand(1),
14875                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14876 
14877   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
14878   return DAG.getBitcast(VT, NarrowBinOp);
14879 }
14880 
14881 /// If we are extracting a subvector from a wide vector load, convert to a
14882 /// narrow load to eliminate the extraction:
14883 /// (extract_subvector (load wide vector)) --> (load narrow vector)
14884 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
14885   // TODO: Add support for big-endian. The offset calculation must be adjusted.
14886   if (DAG.getDataLayout().isBigEndian())
14887     return SDValue();
14888 
14889   // TODO: The one-use check is overly conservative. Check the cost of the
14890   // extract instead or remove that condition entirely.
14891   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
14892   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14893   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
14894       !ExtIdx)
14895     return SDValue();
14896 
14897   // The narrow load will be offset from the base address of the old load if
14898   // we are extracting from something besides index 0 (little-endian).
14899   EVT VT = Extract->getValueType(0);
14900   SDLoc DL(Extract);
14901   SDValue BaseAddr = Ld->getOperand(1);
14902   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
14903 
14904   // TODO: Use "BaseIndexOffset" to make this more effective.
14905   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
14906   MachineFunction &MF = DAG.getMachineFunction();
14907   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
14908                                                    VT.getStoreSize());
14909   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
14910   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
14911   return NewLd;
14912 }
14913 
14914 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14915   EVT NVT = N->getValueType(0);
14916   SDValue V = N->getOperand(0);
14917 
14918   // Extract from UNDEF is UNDEF.
14919   if (V.isUndef())
14920     return DAG.getUNDEF(NVT);
14921 
14922   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
14923     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
14924       return NarrowLoad;
14925 
14926   // Combine:
14927   //    (extract_subvec (concat V1, V2, ...), i)
14928   // Into:
14929   //    Vi if possible
14930   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14931   // type.
14932   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14933       isa<ConstantSDNode>(N->getOperand(1)) &&
14934       V->getOperand(0).getValueType() == NVT) {
14935     unsigned Idx = N->getConstantOperandVal(1);
14936     unsigned NumElems = NVT.getVectorNumElements();
14937     assert((Idx % NumElems) == 0 &&
14938            "IDX in concat is not a multiple of the result vector length.");
14939     return V->getOperand(Idx / NumElems);
14940   }
14941 
14942   // Skip bitcasting
14943   if (V->getOpcode() == ISD::BITCAST)
14944     V = V.getOperand(0);
14945 
14946   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14947     // Handle only simple case where vector being inserted and vector
14948     // being extracted are of same size.
14949     EVT SmallVT = V->getOperand(1).getValueType();
14950     if (!NVT.bitsEq(SmallVT))
14951       return SDValue();
14952 
14953     // Only handle cases where both indexes are constants.
14954     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14955     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14956 
14957     if (InsIdx && ExtIdx) {
14958       // Combine:
14959       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14960       // Into:
14961       //    indices are equal or bit offsets are equal => V1
14962       //    otherwise => (extract_subvec V1, ExtIdx)
14963       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14964           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14965         return DAG.getBitcast(NVT, V->getOperand(1));
14966       return DAG.getNode(
14967           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14968           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14969           N->getOperand(1));
14970     }
14971   }
14972 
14973   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
14974     return NarrowBOp;
14975 
14976   return SDValue();
14977 }
14978 
14979 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14980                                                  SDValue V, SelectionDAG &DAG) {
14981   SDLoc DL(V);
14982   EVT VT = V.getValueType();
14983 
14984   switch (V.getOpcode()) {
14985   default:
14986     return V;
14987 
14988   case ISD::CONCAT_VECTORS: {
14989     EVT OpVT = V->getOperand(0).getValueType();
14990     int OpSize = OpVT.getVectorNumElements();
14991     SmallBitVector OpUsedElements(OpSize, false);
14992     bool FoundSimplification = false;
14993     SmallVector<SDValue, 4> NewOps;
14994     NewOps.reserve(V->getNumOperands());
14995     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14996       SDValue Op = V->getOperand(i);
14997       bool OpUsed = false;
14998       for (int j = 0; j < OpSize; ++j)
14999         if (UsedElements[i * OpSize + j]) {
15000           OpUsedElements[j] = true;
15001           OpUsed = true;
15002         }
15003       NewOps.push_back(
15004           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
15005                  : DAG.getUNDEF(OpVT));
15006       FoundSimplification |= Op == NewOps.back();
15007       OpUsedElements.reset();
15008     }
15009     if (FoundSimplification)
15010       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
15011     return V;
15012   }
15013 
15014   case ISD::INSERT_SUBVECTOR: {
15015     SDValue BaseV = V->getOperand(0);
15016     SDValue SubV = V->getOperand(1);
15017     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
15018     if (!IdxN)
15019       return V;
15020 
15021     int SubSize = SubV.getValueType().getVectorNumElements();
15022     int Idx = IdxN->getZExtValue();
15023     bool SubVectorUsed = false;
15024     SmallBitVector SubUsedElements(SubSize, false);
15025     for (int i = 0; i < SubSize; ++i)
15026       if (UsedElements[i + Idx]) {
15027         SubVectorUsed = true;
15028         SubUsedElements[i] = true;
15029         UsedElements[i + Idx] = false;
15030       }
15031 
15032     // Now recurse on both the base and sub vectors.
15033     SDValue SimplifiedSubV =
15034         SubVectorUsed
15035             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
15036             : DAG.getUNDEF(SubV.getValueType());
15037     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
15038     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
15039       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
15040                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
15041     return V;
15042   }
15043   }
15044 }
15045 
15046 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
15047                                        SDValue N1, SelectionDAG &DAG) {
15048   EVT VT = SVN->getValueType(0);
15049   int NumElts = VT.getVectorNumElements();
15050   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
15051   for (int M : SVN->getMask())
15052     if (M >= 0 && M < NumElts)
15053       N0UsedElements[M] = true;
15054     else if (M >= NumElts)
15055       N1UsedElements[M - NumElts] = true;
15056 
15057   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
15058   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
15059   if (S0 == N0 && S1 == N1)
15060     return SDValue();
15061 
15062   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
15063 }
15064 
15065 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15066 // or turn a shuffle of a single concat into simpler shuffle then concat.
15067 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15068   EVT VT = N->getValueType(0);
15069   unsigned NumElts = VT.getVectorNumElements();
15070 
15071   SDValue N0 = N->getOperand(0);
15072   SDValue N1 = N->getOperand(1);
15073   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15074 
15075   SmallVector<SDValue, 4> Ops;
15076   EVT ConcatVT = N0.getOperand(0).getValueType();
15077   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15078   unsigned NumConcats = NumElts / NumElemsPerConcat;
15079 
15080   // Special case: shuffle(concat(A,B)) can be more efficiently represented
15081   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15082   // half vector elements.
15083   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15084       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15085                   SVN->getMask().end(), [](int i) { return i == -1; })) {
15086     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15087                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15088     N1 = DAG.getUNDEF(ConcatVT);
15089     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15090   }
15091 
15092   // Look at every vector that's inserted. We're looking for exact
15093   // subvector-sized copies from a concatenated vector
15094   for (unsigned I = 0; I != NumConcats; ++I) {
15095     // Make sure we're dealing with a copy.
15096     unsigned Begin = I * NumElemsPerConcat;
15097     bool AllUndef = true, NoUndef = true;
15098     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15099       if (SVN->getMaskElt(J) >= 0)
15100         AllUndef = false;
15101       else
15102         NoUndef = false;
15103     }
15104 
15105     if (NoUndef) {
15106       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15107         return SDValue();
15108 
15109       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15110         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15111           return SDValue();
15112 
15113       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15114       if (FirstElt < N0.getNumOperands())
15115         Ops.push_back(N0.getOperand(FirstElt));
15116       else
15117         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15118 
15119     } else if (AllUndef) {
15120       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15121     } else { // Mixed with general masks and undefs, can't do optimization.
15122       return SDValue();
15123     }
15124   }
15125 
15126   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15127 }
15128 
15129 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15130 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15131 //
15132 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15133 // a simplification in some sense, but it isn't appropriate in general: some
15134 // BUILD_VECTORs are substantially cheaper than others. The general case
15135 // of a BUILD_VECTOR requires inserting each element individually (or
15136 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15137 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15138 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15139 // are undef lowers to a small number of element insertions.
15140 //
15141 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15142 // We don't fold shuffles where one side is a non-zero constant, and we don't
15143 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
15144 // non-constant operands. This seems to work out reasonably well in practice.
15145 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15146                                        SelectionDAG &DAG,
15147                                        const TargetLowering &TLI) {
15148   EVT VT = SVN->getValueType(0);
15149   unsigned NumElts = VT.getVectorNumElements();
15150   SDValue N0 = SVN->getOperand(0);
15151   SDValue N1 = SVN->getOperand(1);
15152 
15153   if (!N0->hasOneUse() || !N1->hasOneUse())
15154     return SDValue();
15155   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15156   // discussed above.
15157   if (!N1.isUndef()) {
15158     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15159     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15160     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15161       return SDValue();
15162     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15163       return SDValue();
15164   }
15165 
15166   SmallVector<SDValue, 8> Ops;
15167   SmallSet<SDValue, 16> DuplicateOps;
15168   for (int M : SVN->getMask()) {
15169     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15170     if (M >= 0) {
15171       int Idx = M < (int)NumElts ? M : M - NumElts;
15172       SDValue &S = (M < (int)NumElts ? N0 : N1);
15173       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15174         Op = S.getOperand(Idx);
15175       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15176         if (Idx == 0)
15177           Op = S.getOperand(0);
15178       } else {
15179         // Operand can't be combined - bail out.
15180         return SDValue();
15181       }
15182     }
15183 
15184     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
15185     // fine, but it's likely to generate low-quality code if the target can't
15186     // reconstruct an appropriate shuffle.
15187     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15188       if (!DuplicateOps.insert(Op).second)
15189         return SDValue();
15190 
15191     Ops.push_back(Op);
15192   }
15193   // BUILD_VECTOR requires all inputs to be of the same type, find the
15194   // maximum type and extend them all.
15195   EVT SVT = VT.getScalarType();
15196   if (SVT.isInteger())
15197     for (SDValue &Op : Ops)
15198       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15199   if (SVT != VT.getScalarType())
15200     for (SDValue &Op : Ops)
15201       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15202                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15203                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15204   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15205 }
15206 
15207 // Match shuffles that can be converted to any_vector_extend_in_reg.
15208 // This is often generated during legalization.
15209 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15210 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15211 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15212                                             SelectionDAG &DAG,
15213                                             const TargetLowering &TLI,
15214                                             bool LegalOperations) {
15215   EVT VT = SVN->getValueType(0);
15216   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15217 
15218   // TODO Add support for big-endian when we have a test case.
15219   if (!VT.isInteger() || IsBigEndian)
15220     return SDValue();
15221 
15222   unsigned NumElts = VT.getVectorNumElements();
15223   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15224   ArrayRef<int> Mask = SVN->getMask();
15225   SDValue N0 = SVN->getOperand(0);
15226 
15227   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15228   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15229     for (unsigned i = 0; i != NumElts; ++i) {
15230       if (Mask[i] < 0)
15231         continue;
15232       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15233         continue;
15234       return false;
15235     }
15236     return true;
15237   };
15238 
15239   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15240   // power-of-2 extensions as they are the most likely.
15241   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15242     if (!isAnyExtend(Scale))
15243       continue;
15244 
15245     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15246     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15247     if (!LegalOperations ||
15248         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15249       return DAG.getBitcast(VT,
15250                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15251   }
15252 
15253   return SDValue();
15254 }
15255 
15256 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15257 // each source element of a large type into the lowest elements of a smaller
15258 // destination type. This is often generated during legalization.
15259 // If the source node itself was a '*_extend_vector_inreg' node then we should
15260 // then be able to remove it.
15261 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15262                                         SelectionDAG &DAG) {
15263   EVT VT = SVN->getValueType(0);
15264   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15265 
15266   // TODO Add support for big-endian when we have a test case.
15267   if (!VT.isInteger() || IsBigEndian)
15268     return SDValue();
15269 
15270   SDValue N0 = SVN->getOperand(0);
15271   while (N0.getOpcode() == ISD::BITCAST)
15272     N0 = N0.getOperand(0);
15273 
15274   unsigned Opcode = N0.getOpcode();
15275   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15276       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15277       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15278     return SDValue();
15279 
15280   SDValue N00 = N0.getOperand(0);
15281   ArrayRef<int> Mask = SVN->getMask();
15282   unsigned NumElts = VT.getVectorNumElements();
15283   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15284   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15285   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15286 
15287   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15288     return SDValue();
15289   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15290 
15291   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15292   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15293   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15294   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15295     for (unsigned i = 0; i != NumElts; ++i) {
15296       if (Mask[i] < 0)
15297         continue;
15298       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15299         continue;
15300       return false;
15301     }
15302     return true;
15303   };
15304 
15305   // At the moment we just handle the case where we've truncated back to the
15306   // same size as before the extension.
15307   // TODO: handle more extension/truncation cases as cases arise.
15308   if (EltSizeInBits != ExtSrcSizeInBits)
15309     return SDValue();
15310 
15311   // We can remove *extend_vector_inreg only if the truncation happens at
15312   // the same scale as the extension.
15313   if (isTruncate(ExtScale))
15314     return DAG.getBitcast(VT, N00);
15315 
15316   return SDValue();
15317 }
15318 
15319 // Combine shuffles of splat-shuffles of the form:
15320 // shuffle (shuffle V, undef, splat-mask), undef, M
15321 // If splat-mask contains undef elements, we need to be careful about
15322 // introducing undef's in the folded mask which are not the result of composing
15323 // the masks of the shuffles.
15324 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15325                                      ShuffleVectorSDNode *Splat,
15326                                      SelectionDAG &DAG) {
15327   ArrayRef<int> SplatMask = Splat->getMask();
15328   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15329 
15330   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15331   // every undef mask element in the splat-shuffle has a corresponding undef
15332   // element in the user-shuffle's mask or if the composition of mask elements
15333   // would result in undef.
15334   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15335   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15336   //   In this case it is not legal to simplify to the splat-shuffle because we
15337   //   may be exposing the users of the shuffle an undef element at index 1
15338   //   which was not there before the combine.
15339   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15340   //   In this case the composition of masks yields SplatMask, so it's ok to
15341   //   simplify to the splat-shuffle.
15342   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15343   //   In this case the composed mask includes all undef elements of SplatMask
15344   //   and in addition sets element zero to undef. It is safe to simplify to
15345   //   the splat-shuffle.
15346   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15347                                        ArrayRef<int> SplatMask) {
15348     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15349       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15350           SplatMask[UserMask[i]] != -1)
15351         return false;
15352     return true;
15353   };
15354   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15355     return SDValue(Splat, 0);
15356 
15357   // Create a new shuffle with a mask that is composed of the two shuffles'
15358   // masks.
15359   SmallVector<int, 32> NewMask;
15360   for (int Idx : UserMask)
15361     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15362 
15363   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15364                               Splat->getOperand(0), Splat->getOperand(1),
15365                               NewMask);
15366 }
15367 
15368 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15369   EVT VT = N->getValueType(0);
15370   unsigned NumElts = VT.getVectorNumElements();
15371 
15372   SDValue N0 = N->getOperand(0);
15373   SDValue N1 = N->getOperand(1);
15374 
15375   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15376 
15377   // Canonicalize shuffle undef, undef -> undef
15378   if (N0.isUndef() && N1.isUndef())
15379     return DAG.getUNDEF(VT);
15380 
15381   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15382 
15383   // Canonicalize shuffle v, v -> v, undef
15384   if (N0 == N1) {
15385     SmallVector<int, 8> NewMask;
15386     for (unsigned i = 0; i != NumElts; ++i) {
15387       int Idx = SVN->getMaskElt(i);
15388       if (Idx >= (int)NumElts) Idx -= NumElts;
15389       NewMask.push_back(Idx);
15390     }
15391     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15392   }
15393 
15394   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15395   if (N0.isUndef())
15396     return DAG.getCommutedVectorShuffle(*SVN);
15397 
15398   // Remove references to rhs if it is undef
15399   if (N1.isUndef()) {
15400     bool Changed = false;
15401     SmallVector<int, 8> NewMask;
15402     for (unsigned i = 0; i != NumElts; ++i) {
15403       int Idx = SVN->getMaskElt(i);
15404       if (Idx >= (int)NumElts) {
15405         Idx = -1;
15406         Changed = true;
15407       }
15408       NewMask.push_back(Idx);
15409     }
15410     if (Changed)
15411       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15412   }
15413 
15414   // A shuffle of a single vector that is a splat can always be folded.
15415   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15416     if (N1->isUndef() && N0Shuf->isSplat())
15417       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15418 
15419   // If it is a splat, check if the argument vector is another splat or a
15420   // build_vector.
15421   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15422     SDNode *V = N0.getNode();
15423 
15424     // If this is a bit convert that changes the element type of the vector but
15425     // not the number of vector elements, look through it.  Be careful not to
15426     // look though conversions that change things like v4f32 to v2f64.
15427     if (V->getOpcode() == ISD::BITCAST) {
15428       SDValue ConvInput = V->getOperand(0);
15429       if (ConvInput.getValueType().isVector() &&
15430           ConvInput.getValueType().getVectorNumElements() == NumElts)
15431         V = ConvInput.getNode();
15432     }
15433 
15434     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15435       assert(V->getNumOperands() == NumElts &&
15436              "BUILD_VECTOR has wrong number of operands");
15437       SDValue Base;
15438       bool AllSame = true;
15439       for (unsigned i = 0; i != NumElts; ++i) {
15440         if (!V->getOperand(i).isUndef()) {
15441           Base = V->getOperand(i);
15442           break;
15443         }
15444       }
15445       // Splat of <u, u, u, u>, return <u, u, u, u>
15446       if (!Base.getNode())
15447         return N0;
15448       for (unsigned i = 0; i != NumElts; ++i) {
15449         if (V->getOperand(i) != Base) {
15450           AllSame = false;
15451           break;
15452         }
15453       }
15454       // Splat of <x, x, x, x>, return <x, x, x, x>
15455       if (AllSame)
15456         return N0;
15457 
15458       // Canonicalize any other splat as a build_vector.
15459       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15460       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15461       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15462 
15463       // We may have jumped through bitcasts, so the type of the
15464       // BUILD_VECTOR may not match the type of the shuffle.
15465       if (V->getValueType(0) != VT)
15466         NewBV = DAG.getBitcast(VT, NewBV);
15467       return NewBV;
15468     }
15469   }
15470 
15471   // There are various patterns used to build up a vector from smaller vectors,
15472   // subvectors, or elements. Scan chains of these and replace unused insertions
15473   // or components with undef.
15474   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15475     return S;
15476 
15477   // Match shuffles that can be converted to any_vector_extend_in_reg.
15478   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
15479     return V;
15480 
15481   // Combine "truncate_vector_in_reg" style shuffles.
15482   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15483     return V;
15484 
15485   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15486       Level < AfterLegalizeVectorOps &&
15487       (N1.isUndef() ||
15488       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15489        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15490     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15491       return V;
15492   }
15493 
15494   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15495   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15496   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15497     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15498       return Res;
15499 
15500   // If this shuffle only has a single input that is a bitcasted shuffle,
15501   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15502   // back to their original types.
15503   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15504       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15505       TLI.isTypeLegal(VT)) {
15506 
15507     // Peek through the bitcast only if there is one user.
15508     SDValue BC0 = N0;
15509     while (BC0.getOpcode() == ISD::BITCAST) {
15510       if (!BC0.hasOneUse())
15511         break;
15512       BC0 = BC0.getOperand(0);
15513     }
15514 
15515     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15516       if (Scale == 1)
15517         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15518 
15519       SmallVector<int, 8> NewMask;
15520       for (int M : Mask)
15521         for (int s = 0; s != Scale; ++s)
15522           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15523       return NewMask;
15524     };
15525 
15526     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15527       EVT SVT = VT.getScalarType();
15528       EVT InnerVT = BC0->getValueType(0);
15529       EVT InnerSVT = InnerVT.getScalarType();
15530 
15531       // Determine which shuffle works with the smaller scalar type.
15532       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15533       EVT ScaleSVT = ScaleVT.getScalarType();
15534 
15535       if (TLI.isTypeLegal(ScaleVT) &&
15536           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15537           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15538 
15539         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15540         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15541 
15542         // Scale the shuffle masks to the smaller scalar type.
15543         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15544         SmallVector<int, 8> InnerMask =
15545             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15546         SmallVector<int, 8> OuterMask =
15547             ScaleShuffleMask(SVN->getMask(), OuterScale);
15548 
15549         // Merge the shuffle masks.
15550         SmallVector<int, 8> NewMask;
15551         for (int M : OuterMask)
15552           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15553 
15554         // Test for shuffle mask legality over both commutations.
15555         SDValue SV0 = BC0->getOperand(0);
15556         SDValue SV1 = BC0->getOperand(1);
15557         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15558         if (!LegalMask) {
15559           std::swap(SV0, SV1);
15560           ShuffleVectorSDNode::commuteMask(NewMask);
15561           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15562         }
15563 
15564         if (LegalMask) {
15565           SV0 = DAG.getBitcast(ScaleVT, SV0);
15566           SV1 = DAG.getBitcast(ScaleVT, SV1);
15567           return DAG.getBitcast(
15568               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15569         }
15570       }
15571     }
15572   }
15573 
15574   // Canonicalize shuffles according to rules:
15575   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15576   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15577   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15578   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15579       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15580       TLI.isTypeLegal(VT)) {
15581     // The incoming shuffle must be of the same type as the result of the
15582     // current shuffle.
15583     assert(N1->getOperand(0).getValueType() == VT &&
15584            "Shuffle types don't match");
15585 
15586     SDValue SV0 = N1->getOperand(0);
15587     SDValue SV1 = N1->getOperand(1);
15588     bool HasSameOp0 = N0 == SV0;
15589     bool IsSV1Undef = SV1.isUndef();
15590     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15591       // Commute the operands of this shuffle so that next rule
15592       // will trigger.
15593       return DAG.getCommutedVectorShuffle(*SVN);
15594   }
15595 
15596   // Try to fold according to rules:
15597   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15598   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15599   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15600   // Don't try to fold shuffles with illegal type.
15601   // Only fold if this shuffle is the only user of the other shuffle.
15602   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15603       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15604     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15605 
15606     // Don't try to fold splats; they're likely to simplify somehow, or they
15607     // might be free.
15608     if (OtherSV->isSplat())
15609       return SDValue();
15610 
15611     // The incoming shuffle must be of the same type as the result of the
15612     // current shuffle.
15613     assert(OtherSV->getOperand(0).getValueType() == VT &&
15614            "Shuffle types don't match");
15615 
15616     SDValue SV0, SV1;
15617     SmallVector<int, 4> Mask;
15618     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15619     // operand, and SV1 as the second operand.
15620     for (unsigned i = 0; i != NumElts; ++i) {
15621       int Idx = SVN->getMaskElt(i);
15622       if (Idx < 0) {
15623         // Propagate Undef.
15624         Mask.push_back(Idx);
15625         continue;
15626       }
15627 
15628       SDValue CurrentVec;
15629       if (Idx < (int)NumElts) {
15630         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15631         // shuffle mask to identify which vector is actually referenced.
15632         Idx = OtherSV->getMaskElt(Idx);
15633         if (Idx < 0) {
15634           // Propagate Undef.
15635           Mask.push_back(Idx);
15636           continue;
15637         }
15638 
15639         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15640                                            : OtherSV->getOperand(1);
15641       } else {
15642         // This shuffle index references an element within N1.
15643         CurrentVec = N1;
15644       }
15645 
15646       // Simple case where 'CurrentVec' is UNDEF.
15647       if (CurrentVec.isUndef()) {
15648         Mask.push_back(-1);
15649         continue;
15650       }
15651 
15652       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15653       // will be the first or second operand of the combined shuffle.
15654       Idx = Idx % NumElts;
15655       if (!SV0.getNode() || SV0 == CurrentVec) {
15656         // Ok. CurrentVec is the left hand side.
15657         // Update the mask accordingly.
15658         SV0 = CurrentVec;
15659         Mask.push_back(Idx);
15660         continue;
15661       }
15662 
15663       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15664       if (SV1.getNode() && SV1 != CurrentVec)
15665         return SDValue();
15666 
15667       // Ok. CurrentVec is the right hand side.
15668       // Update the mask accordingly.
15669       SV1 = CurrentVec;
15670       Mask.push_back(Idx + NumElts);
15671     }
15672 
15673     // Check if all indices in Mask are Undef. In case, propagate Undef.
15674     bool isUndefMask = true;
15675     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15676       isUndefMask &= Mask[i] < 0;
15677 
15678     if (isUndefMask)
15679       return DAG.getUNDEF(VT);
15680 
15681     if (!SV0.getNode())
15682       SV0 = DAG.getUNDEF(VT);
15683     if (!SV1.getNode())
15684       SV1 = DAG.getUNDEF(VT);
15685 
15686     // Avoid introducing shuffles with illegal mask.
15687     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15688       ShuffleVectorSDNode::commuteMask(Mask);
15689 
15690       if (!TLI.isShuffleMaskLegal(Mask, VT))
15691         return SDValue();
15692 
15693       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15694       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15695       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15696       std::swap(SV0, SV1);
15697     }
15698 
15699     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15700     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15701     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15702     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15703   }
15704 
15705   return SDValue();
15706 }
15707 
15708 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
15709   SDValue InVal = N->getOperand(0);
15710   EVT VT = N->getValueType(0);
15711 
15712   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
15713   // with a VECTOR_SHUFFLE.
15714   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15715     SDValue InVec = InVal->getOperand(0);
15716     SDValue EltNo = InVal->getOperand(1);
15717 
15718     // FIXME: We could support implicit truncation if the shuffle can be
15719     // scaled to a smaller vector scalar type.
15720     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
15721     if (C0 && VT == InVec.getValueType() &&
15722         VT.getScalarType() == InVal.getValueType()) {
15723       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
15724       int Elt = C0->getZExtValue();
15725       NewMask[0] = Elt;
15726 
15727       if (TLI.isShuffleMaskLegal(NewMask, VT))
15728         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
15729                                     NewMask);
15730     }
15731   }
15732 
15733   return SDValue();
15734 }
15735 
15736 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
15737   EVT VT = N->getValueType(0);
15738   SDValue N0 = N->getOperand(0);
15739   SDValue N1 = N->getOperand(1);
15740   SDValue N2 = N->getOperand(2);
15741 
15742   // If inserting an UNDEF, just return the original vector.
15743   if (N1.isUndef())
15744     return N0;
15745 
15746   // If this is an insert of an extracted vector into an undef vector, we can
15747   // just use the input to the extract.
15748   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15749       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
15750     return N1.getOperand(0);
15751 
15752   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
15753   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
15754   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
15755   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
15756       N0.getOperand(1).getValueType() == N1.getValueType() &&
15757       N0.getOperand(2) == N2)
15758     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
15759                        N1, N2);
15760 
15761   if (!isa<ConstantSDNode>(N2))
15762     return SDValue();
15763 
15764   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
15765 
15766   // Canonicalize insert_subvector dag nodes.
15767   // Example:
15768   // (insert_subvector (insert_subvector A, Idx0), Idx1)
15769   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
15770   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
15771       N1.getValueType() == N0.getOperand(1).getValueType() &&
15772       isa<ConstantSDNode>(N0.getOperand(2))) {
15773     unsigned OtherIdx = N0.getConstantOperandVal(2);
15774     if (InsIdx < OtherIdx) {
15775       // Swap nodes.
15776       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
15777                                   N0.getOperand(0), N1, N2);
15778       AddToWorklist(NewOp.getNode());
15779       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
15780                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
15781     }
15782   }
15783 
15784   // If the input vector is a concatenation, and the insert replaces
15785   // one of the pieces, we can optimize into a single concat_vectors.
15786   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15787       N0.getOperand(0).getValueType() == N1.getValueType()) {
15788     unsigned Factor = N1.getValueType().getVectorNumElements();
15789 
15790     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15791     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15792 
15793     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15794   }
15795 
15796   return SDValue();
15797 }
15798 
15799 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15800   SDValue N0 = N->getOperand(0);
15801 
15802   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15803   if (N0->getOpcode() == ISD::FP16_TO_FP)
15804     return N0->getOperand(0);
15805 
15806   return SDValue();
15807 }
15808 
15809 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15810   SDValue N0 = N->getOperand(0);
15811 
15812   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15813   if (N0->getOpcode() == ISD::AND) {
15814     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15815     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15816       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15817                          N0.getOperand(0));
15818     }
15819   }
15820 
15821   return SDValue();
15822 }
15823 
15824 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15825 /// with the destination vector and a zero vector.
15826 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15827 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15828 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15829   EVT VT = N->getValueType(0);
15830   SDValue LHS = N->getOperand(0);
15831   SDValue RHS = N->getOperand(1);
15832   SDLoc DL(N);
15833 
15834   // Make sure we're not running after operation legalization where it
15835   // may have custom lowered the vector shuffles.
15836   if (LegalOperations)
15837     return SDValue();
15838 
15839   if (N->getOpcode() != ISD::AND)
15840     return SDValue();
15841 
15842   if (RHS.getOpcode() == ISD::BITCAST)
15843     RHS = RHS.getOperand(0);
15844 
15845   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15846     return SDValue();
15847 
15848   EVT RVT = RHS.getValueType();
15849   unsigned NumElts = RHS.getNumOperands();
15850 
15851   // Attempt to create a valid clear mask, splitting the mask into
15852   // sub elements and checking to see if each is
15853   // all zeros or all ones - suitable for shuffle masking.
15854   auto BuildClearMask = [&](int Split) {
15855     int NumSubElts = NumElts * Split;
15856     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15857 
15858     SmallVector<int, 8> Indices;
15859     for (int i = 0; i != NumSubElts; ++i) {
15860       int EltIdx = i / Split;
15861       int SubIdx = i % Split;
15862       SDValue Elt = RHS.getOperand(EltIdx);
15863       if (Elt.isUndef()) {
15864         Indices.push_back(-1);
15865         continue;
15866       }
15867 
15868       APInt Bits;
15869       if (isa<ConstantSDNode>(Elt))
15870         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15871       else if (isa<ConstantFPSDNode>(Elt))
15872         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15873       else
15874         return SDValue();
15875 
15876       // Extract the sub element from the constant bit mask.
15877       if (DAG.getDataLayout().isBigEndian()) {
15878         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
15879       } else {
15880         Bits.lshrInPlace(SubIdx * NumSubBits);
15881       }
15882 
15883       if (Split > 1)
15884         Bits = Bits.trunc(NumSubBits);
15885 
15886       if (Bits.isAllOnesValue())
15887         Indices.push_back(i);
15888       else if (Bits == 0)
15889         Indices.push_back(i + NumSubElts);
15890       else
15891         return SDValue();
15892     }
15893 
15894     // Let's see if the target supports this vector_shuffle.
15895     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15896     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15897     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15898       return SDValue();
15899 
15900     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15901     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15902                                                    DAG.getBitcast(ClearVT, LHS),
15903                                                    Zero, Indices));
15904   };
15905 
15906   // Determine maximum split level (byte level masking).
15907   int MaxSplit = 1;
15908   if (RVT.getScalarSizeInBits() % 8 == 0)
15909     MaxSplit = RVT.getScalarSizeInBits() / 8;
15910 
15911   for (int Split = 1; Split <= MaxSplit; ++Split)
15912     if (RVT.getScalarSizeInBits() % Split == 0)
15913       if (SDValue S = BuildClearMask(Split))
15914         return S;
15915 
15916   return SDValue();
15917 }
15918 
15919 /// Visit a binary vector operation, like ADD.
15920 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15921   assert(N->getValueType(0).isVector() &&
15922          "SimplifyVBinOp only works on vectors!");
15923 
15924   SDValue LHS = N->getOperand(0);
15925   SDValue RHS = N->getOperand(1);
15926   SDValue Ops[] = {LHS, RHS};
15927 
15928   // See if we can constant fold the vector operation.
15929   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15930           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15931     return Fold;
15932 
15933   // Try to convert a constant mask AND into a shuffle clear mask.
15934   if (SDValue Shuffle = XformToShuffleWithZero(N))
15935     return Shuffle;
15936 
15937   // Type legalization might introduce new shuffles in the DAG.
15938   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15939   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15940   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15941       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15942       LHS.getOperand(1).isUndef() &&
15943       RHS.getOperand(1).isUndef()) {
15944     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15945     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15946 
15947     if (SVN0->getMask().equals(SVN1->getMask())) {
15948       EVT VT = N->getValueType(0);
15949       SDValue UndefVector = LHS.getOperand(1);
15950       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15951                                      LHS.getOperand(0), RHS.getOperand(0),
15952                                      N->getFlags());
15953       AddUsersToWorklist(N);
15954       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15955                                   SVN0->getMask());
15956     }
15957   }
15958 
15959   return SDValue();
15960 }
15961 
15962 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15963                                     SDValue N2) {
15964   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15965 
15966   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15967                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15968 
15969   // If we got a simplified select_cc node back from SimplifySelectCC, then
15970   // break it down into a new SETCC node, and a new SELECT node, and then return
15971   // the SELECT node, since we were called with a SELECT node.
15972   if (SCC.getNode()) {
15973     // Check to see if we got a select_cc back (to turn into setcc/select).
15974     // Otherwise, just return whatever node we got back, like fabs.
15975     if (SCC.getOpcode() == ISD::SELECT_CC) {
15976       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15977                                   N0.getValueType(),
15978                                   SCC.getOperand(0), SCC.getOperand(1),
15979                                   SCC.getOperand(4));
15980       AddToWorklist(SETCC.getNode());
15981       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15982                            SCC.getOperand(2), SCC.getOperand(3));
15983     }
15984 
15985     return SCC;
15986   }
15987   return SDValue();
15988 }
15989 
15990 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15991 /// being selected between, see if we can simplify the select.  Callers of this
15992 /// should assume that TheSelect is deleted if this returns true.  As such, they
15993 /// should return the appropriate thing (e.g. the node) back to the top-level of
15994 /// the DAG combiner loop to avoid it being looked at.
15995 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15996                                     SDValue RHS) {
15997 
15998   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15999   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16000   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16001     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16002       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16003       SDValue Sqrt = RHS;
16004       ISD::CondCode CC;
16005       SDValue CmpLHS;
16006       const ConstantFPSDNode *Zero = nullptr;
16007 
16008       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16009         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16010         CmpLHS = TheSelect->getOperand(0);
16011         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16012       } else {
16013         // SELECT or VSELECT
16014         SDValue Cmp = TheSelect->getOperand(0);
16015         if (Cmp.getOpcode() == ISD::SETCC) {
16016           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16017           CmpLHS = Cmp.getOperand(0);
16018           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16019         }
16020       }
16021       if (Zero && Zero->isZero() &&
16022           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16023           CC == ISD::SETULT || CC == ISD::SETLT)) {
16024         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16025         CombineTo(TheSelect, Sqrt);
16026         return true;
16027       }
16028     }
16029   }
16030   // Cannot simplify select with vector condition
16031   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16032 
16033   // If this is a select from two identical things, try to pull the operation
16034   // through the select.
16035   if (LHS.getOpcode() != RHS.getOpcode() ||
16036       !LHS.hasOneUse() || !RHS.hasOneUse())
16037     return false;
16038 
16039   // If this is a load and the token chain is identical, replace the select
16040   // of two loads with a load through a select of the address to load from.
16041   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16042   // constants have been dropped into the constant pool.
16043   if (LHS.getOpcode() == ISD::LOAD) {
16044     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16045     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16046 
16047     // Token chains must be identical.
16048     if (LHS.getOperand(0) != RHS.getOperand(0) ||
16049         // Do not let this transformation reduce the number of volatile loads.
16050         LLD->isVolatile() || RLD->isVolatile() ||
16051         // FIXME: If either is a pre/post inc/dec load,
16052         // we'd need to split out the address adjustment.
16053         LLD->isIndexed() || RLD->isIndexed() ||
16054         // If this is an EXTLOAD, the VT's must match.
16055         LLD->getMemoryVT() != RLD->getMemoryVT() ||
16056         // If this is an EXTLOAD, the kind of extension must match.
16057         (LLD->getExtensionType() != RLD->getExtensionType() &&
16058          // The only exception is if one of the extensions is anyext.
16059          LLD->getExtensionType() != ISD::EXTLOAD &&
16060          RLD->getExtensionType() != ISD::EXTLOAD) ||
16061         // FIXME: this discards src value information.  This is
16062         // over-conservative. It would be beneficial to be able to remember
16063         // both potential memory locations.  Since we are discarding
16064         // src value info, don't do the transformation if the memory
16065         // locations are not in the default address space.
16066         LLD->getPointerInfo().getAddrSpace() != 0 ||
16067         RLD->getPointerInfo().getAddrSpace() != 0 ||
16068         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16069                                       LLD->getBasePtr().getValueType()))
16070       return false;
16071 
16072     // Check that the select condition doesn't reach either load.  If so,
16073     // folding this will induce a cycle into the DAG.  If not, this is safe to
16074     // xform, so create a select of the addresses.
16075     SDValue Addr;
16076     if (TheSelect->getOpcode() == ISD::SELECT) {
16077       SDNode *CondNode = TheSelect->getOperand(0).getNode();
16078       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16079           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16080         return false;
16081       // The loads must not depend on one another.
16082       if (LLD->isPredecessorOf(RLD) ||
16083           RLD->isPredecessorOf(LLD))
16084         return false;
16085       Addr = DAG.getSelect(SDLoc(TheSelect),
16086                            LLD->getBasePtr().getValueType(),
16087                            TheSelect->getOperand(0), LLD->getBasePtr(),
16088                            RLD->getBasePtr());
16089     } else {  // Otherwise SELECT_CC
16090       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16091       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16092 
16093       if ((LLD->hasAnyUseOfValue(1) &&
16094            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16095           (RLD->hasAnyUseOfValue(1) &&
16096            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16097         return false;
16098 
16099       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16100                          LLD->getBasePtr().getValueType(),
16101                          TheSelect->getOperand(0),
16102                          TheSelect->getOperand(1),
16103                          LLD->getBasePtr(), RLD->getBasePtr(),
16104                          TheSelect->getOperand(4));
16105     }
16106 
16107     SDValue Load;
16108     // It is safe to replace the two loads if they have different alignments,
16109     // but the new load must be the minimum (most restrictive) alignment of the
16110     // inputs.
16111     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16112     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16113     if (!RLD->isInvariant())
16114       MMOFlags &= ~MachineMemOperand::MOInvariant;
16115     if (!RLD->isDereferenceable())
16116       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16117     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16118       // FIXME: Discards pointer and AA info.
16119       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16120                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16121                          MMOFlags);
16122     } else {
16123       // FIXME: Discards pointer and AA info.
16124       Load = DAG.getExtLoad(
16125           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16126                                                   : LLD->getExtensionType(),
16127           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16128           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16129     }
16130 
16131     // Users of the select now use the result of the load.
16132     CombineTo(TheSelect, Load);
16133 
16134     // Users of the old loads now use the new load's chain.  We know the
16135     // old-load value is dead now.
16136     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16137     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16138     return true;
16139   }
16140 
16141   return false;
16142 }
16143 
16144 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16145 /// bitwise 'and'.
16146 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16147                                             SDValue N1, SDValue N2, SDValue N3,
16148                                             ISD::CondCode CC) {
16149   // If this is a select where the false operand is zero and the compare is a
16150   // check of the sign bit, see if we can perform the "gzip trick":
16151   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16152   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16153   EVT XType = N0.getValueType();
16154   EVT AType = N2.getValueType();
16155   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16156     return SDValue();
16157 
16158   // If the comparison is testing for a positive value, we have to invert
16159   // the sign bit mask, so only do that transform if the target has a bitwise
16160   // 'and not' instruction (the invert is free).
16161   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16162     // (X > -1) ? A : 0
16163     // (X >  0) ? X : 0 <-- This is canonical signed max.
16164     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16165       return SDValue();
16166   } else if (CC == ISD::SETLT) {
16167     // (X <  0) ? A : 0
16168     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16169     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16170       return SDValue();
16171   } else {
16172     return SDValue();
16173   }
16174 
16175   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16176   // constant.
16177   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16178   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16179   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16180     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16181     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16182     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16183     AddToWorklist(Shift.getNode());
16184 
16185     if (XType.bitsGT(AType)) {
16186       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16187       AddToWorklist(Shift.getNode());
16188     }
16189 
16190     if (CC == ISD::SETGT)
16191       Shift = DAG.getNOT(DL, Shift, AType);
16192 
16193     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16194   }
16195 
16196   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16197   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16198   AddToWorklist(Shift.getNode());
16199 
16200   if (XType.bitsGT(AType)) {
16201     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16202     AddToWorklist(Shift.getNode());
16203   }
16204 
16205   if (CC == ISD::SETGT)
16206     Shift = DAG.getNOT(DL, Shift, AType);
16207 
16208   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16209 }
16210 
16211 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16212 /// where 'cond' is the comparison specified by CC.
16213 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16214                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16215                                       bool NotExtCompare) {
16216   // (x ? y : y) -> y.
16217   if (N2 == N3) return N2;
16218 
16219   EVT VT = N2.getValueType();
16220   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16221   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16222 
16223   // Determine if the condition we're dealing with is constant
16224   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16225                               N0, N1, CC, DL, false);
16226   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16227 
16228   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16229     // fold select_cc true, x, y -> x
16230     // fold select_cc false, x, y -> y
16231     return !SCCC->isNullValue() ? N2 : N3;
16232   }
16233 
16234   // Check to see if we can simplify the select into an fabs node
16235   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16236     // Allow either -0.0 or 0.0
16237     if (CFP->isZero()) {
16238       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16239       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16240           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16241           N2 == N3.getOperand(0))
16242         return DAG.getNode(ISD::FABS, DL, VT, N0);
16243 
16244       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16245       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16246           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16247           N2.getOperand(0) == N3)
16248         return DAG.getNode(ISD::FABS, DL, VT, N3);
16249     }
16250   }
16251 
16252   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16253   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16254   // in it.  This is a win when the constant is not otherwise available because
16255   // it replaces two constant pool loads with one.  We only do this if the FP
16256   // type is known to be legal, because if it isn't, then we are before legalize
16257   // types an we want the other legalization to happen first (e.g. to avoid
16258   // messing with soft float) and if the ConstantFP is not legal, because if
16259   // it is legal, we may not need to store the FP constant in a constant pool.
16260   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16261     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16262       if (TLI.isTypeLegal(N2.getValueType()) &&
16263           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16264                TargetLowering::Legal &&
16265            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16266            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16267           // If both constants have multiple uses, then we won't need to do an
16268           // extra load, they are likely around in registers for other users.
16269           (TV->hasOneUse() || FV->hasOneUse())) {
16270         Constant *Elts[] = {
16271           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16272           const_cast<ConstantFP*>(TV->getConstantFPValue())
16273         };
16274         Type *FPTy = Elts[0]->getType();
16275         const DataLayout &TD = DAG.getDataLayout();
16276 
16277         // Create a ConstantArray of the two constants.
16278         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16279         SDValue CPIdx =
16280             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16281                                 TD.getPrefTypeAlignment(FPTy));
16282         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16283 
16284         // Get the offsets to the 0 and 1 element of the array so that we can
16285         // select between them.
16286         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16287         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16288         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16289 
16290         SDValue Cond = DAG.getSetCC(DL,
16291                                     getSetCCResultType(N0.getValueType()),
16292                                     N0, N1, CC);
16293         AddToWorklist(Cond.getNode());
16294         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16295                                           Cond, One, Zero);
16296         AddToWorklist(CstOffset.getNode());
16297         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16298                             CstOffset);
16299         AddToWorklist(CPIdx.getNode());
16300         return DAG.getLoad(
16301             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16302             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16303             Alignment);
16304       }
16305     }
16306 
16307   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16308     return V;
16309 
16310   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16311   // where y is has a single bit set.
16312   // A plaintext description would be, we can turn the SELECT_CC into an AND
16313   // when the condition can be materialized as an all-ones register.  Any
16314   // single bit-test can be materialized as an all-ones register with
16315   // shift-left and shift-right-arith.
16316   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16317       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16318     SDValue AndLHS = N0->getOperand(0);
16319     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16320     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16321       // Shift the tested bit over the sign bit.
16322       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16323       SDValue ShlAmt =
16324         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16325                         getShiftAmountTy(AndLHS.getValueType()));
16326       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16327 
16328       // Now arithmetic right shift it all the way over, so the result is either
16329       // all-ones, or zero.
16330       SDValue ShrAmt =
16331         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16332                         getShiftAmountTy(Shl.getValueType()));
16333       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16334 
16335       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16336     }
16337   }
16338 
16339   // fold select C, 16, 0 -> shl C, 4
16340   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16341       TLI.getBooleanContents(N0.getValueType()) ==
16342           TargetLowering::ZeroOrOneBooleanContent) {
16343 
16344     // If the caller doesn't want us to simplify this into a zext of a compare,
16345     // don't do it.
16346     if (NotExtCompare && N2C->isOne())
16347       return SDValue();
16348 
16349     // Get a SetCC of the condition
16350     // NOTE: Don't create a SETCC if it's not legal on this target.
16351     if (!LegalOperations ||
16352         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16353       SDValue Temp, SCC;
16354       // cast from setcc result type to select result type
16355       if (LegalTypes) {
16356         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16357                             N0, N1, CC);
16358         if (N2.getValueType().bitsLT(SCC.getValueType()))
16359           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16360                                         N2.getValueType());
16361         else
16362           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16363                              N2.getValueType(), SCC);
16364       } else {
16365         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16366         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16367                            N2.getValueType(), SCC);
16368       }
16369 
16370       AddToWorklist(SCC.getNode());
16371       AddToWorklist(Temp.getNode());
16372 
16373       if (N2C->isOne())
16374         return Temp;
16375 
16376       // shl setcc result by log2 n2c
16377       return DAG.getNode(
16378           ISD::SHL, DL, N2.getValueType(), Temp,
16379           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16380                           getShiftAmountTy(Temp.getValueType())));
16381     }
16382   }
16383 
16384   // Check to see if this is an integer abs.
16385   // select_cc setg[te] X,  0,  X, -X ->
16386   // select_cc setgt    X, -1,  X, -X ->
16387   // select_cc setl[te] X,  0, -X,  X ->
16388   // select_cc setlt    X,  1, -X,  X ->
16389   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16390   if (N1C) {
16391     ConstantSDNode *SubC = nullptr;
16392     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16393          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16394         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16395       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16396     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16397               (N1C->isOne() && CC == ISD::SETLT)) &&
16398              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16399       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16400 
16401     EVT XType = N0.getValueType();
16402     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16403       SDLoc DL(N0);
16404       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16405                                   N0,
16406                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16407                                          getShiftAmountTy(N0.getValueType())));
16408       SDValue Add = DAG.getNode(ISD::ADD, DL,
16409                                 XType, N0, Shift);
16410       AddToWorklist(Shift.getNode());
16411       AddToWorklist(Add.getNode());
16412       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16413     }
16414   }
16415 
16416   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16417   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16418   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16419   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16420   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16421   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16422   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16423   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16424   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16425     SDValue ValueOnZero = N2;
16426     SDValue Count = N3;
16427     // If the condition is NE instead of E, swap the operands.
16428     if (CC == ISD::SETNE)
16429       std::swap(ValueOnZero, Count);
16430     // Check if the value on zero is a constant equal to the bits in the type.
16431     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16432       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16433         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16434         // legal, combine to just cttz.
16435         if ((Count.getOpcode() == ISD::CTTZ ||
16436              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16437             N0 == Count.getOperand(0) &&
16438             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16439           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16440         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16441         // legal, combine to just ctlz.
16442         if ((Count.getOpcode() == ISD::CTLZ ||
16443              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16444             N0 == Count.getOperand(0) &&
16445             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16446           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16447       }
16448     }
16449   }
16450 
16451   return SDValue();
16452 }
16453 
16454 /// This is a stub for TargetLowering::SimplifySetCC.
16455 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16456                                    ISD::CondCode Cond, const SDLoc &DL,
16457                                    bool foldBooleans) {
16458   TargetLowering::DAGCombinerInfo
16459     DagCombineInfo(DAG, Level, false, this);
16460   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16461 }
16462 
16463 /// Given an ISD::SDIV node expressing a divide by constant, return
16464 /// a DAG expression to select that will generate the same value by multiplying
16465 /// by a magic number.
16466 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16467 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16468   // when optimising for minimum size, we don't want to expand a div to a mul
16469   // and a shift.
16470   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16471     return SDValue();
16472 
16473   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16474   if (!C)
16475     return SDValue();
16476 
16477   // Avoid division by zero.
16478   if (C->isNullValue())
16479     return SDValue();
16480 
16481   std::vector<SDNode*> Built;
16482   SDValue S =
16483       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16484 
16485   for (SDNode *N : Built)
16486     AddToWorklist(N);
16487   return S;
16488 }
16489 
16490 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16491 /// DAG expression that will generate the same value by right shifting.
16492 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16493   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16494   if (!C)
16495     return SDValue();
16496 
16497   // Avoid division by zero.
16498   if (C->isNullValue())
16499     return SDValue();
16500 
16501   std::vector<SDNode *> Built;
16502   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16503 
16504   for (SDNode *N : Built)
16505     AddToWorklist(N);
16506   return S;
16507 }
16508 
16509 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16510 /// expression that will generate the same value by multiplying by a magic
16511 /// number.
16512 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16513 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16514   // when optimising for minimum size, we don't want to expand a div to a mul
16515   // and a shift.
16516   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16517     return SDValue();
16518 
16519   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16520   if (!C)
16521     return SDValue();
16522 
16523   // Avoid division by zero.
16524   if (C->isNullValue())
16525     return SDValue();
16526 
16527   std::vector<SDNode*> Built;
16528   SDValue S =
16529       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16530 
16531   for (SDNode *N : Built)
16532     AddToWorklist(N);
16533   return S;
16534 }
16535 
16536 /// Determines the LogBase2 value for a non-null input value using the
16537 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16538 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16539   EVT VT = V.getValueType();
16540   unsigned EltBits = VT.getScalarSizeInBits();
16541   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16542   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16543   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16544   return LogBase2;
16545 }
16546 
16547 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16548 /// For the reciprocal, we need to find the zero of the function:
16549 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16550 ///     =>
16551 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16552 ///     does not require additional intermediate precision]
16553 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16554   if (Level >= AfterLegalizeDAG)
16555     return SDValue();
16556 
16557   // TODO: Handle half and/or extended types?
16558   EVT VT = Op.getValueType();
16559   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16560     return SDValue();
16561 
16562   // If estimates are explicitly disabled for this function, we're done.
16563   MachineFunction &MF = DAG.getMachineFunction();
16564   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16565   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16566     return SDValue();
16567 
16568   // Estimates may be explicitly enabled for this type with a custom number of
16569   // refinement steps.
16570   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16571   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16572     AddToWorklist(Est.getNode());
16573 
16574     if (Iterations) {
16575       EVT VT = Op.getValueType();
16576       SDLoc DL(Op);
16577       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16578 
16579       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16580       for (int i = 0; i < Iterations; ++i) {
16581         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16582         AddToWorklist(NewEst.getNode());
16583 
16584         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16585         AddToWorklist(NewEst.getNode());
16586 
16587         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16588         AddToWorklist(NewEst.getNode());
16589 
16590         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16591         AddToWorklist(Est.getNode());
16592       }
16593     }
16594     return Est;
16595   }
16596 
16597   return SDValue();
16598 }
16599 
16600 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16601 /// For the reciprocal sqrt, we need to find the zero of the function:
16602 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16603 ///     =>
16604 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16605 /// As a result, we precompute A/2 prior to the iteration loop.
16606 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16607                                          unsigned Iterations,
16608                                          SDNodeFlags Flags, bool Reciprocal) {
16609   EVT VT = Arg.getValueType();
16610   SDLoc DL(Arg);
16611   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16612 
16613   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16614   // this entire sequence requires only one FP constant.
16615   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16616   AddToWorklist(HalfArg.getNode());
16617 
16618   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16619   AddToWorklist(HalfArg.getNode());
16620 
16621   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16622   for (unsigned i = 0; i < Iterations; ++i) {
16623     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16624     AddToWorklist(NewEst.getNode());
16625 
16626     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16627     AddToWorklist(NewEst.getNode());
16628 
16629     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16630     AddToWorklist(NewEst.getNode());
16631 
16632     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16633     AddToWorklist(Est.getNode());
16634   }
16635 
16636   // If non-reciprocal square root is requested, multiply the result by Arg.
16637   if (!Reciprocal) {
16638     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16639     AddToWorklist(Est.getNode());
16640   }
16641 
16642   return Est;
16643 }
16644 
16645 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16646 /// For the reciprocal sqrt, we need to find the zero of the function:
16647 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16648 ///     =>
16649 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16650 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16651                                          unsigned Iterations,
16652                                          SDNodeFlags Flags, bool Reciprocal) {
16653   EVT VT = Arg.getValueType();
16654   SDLoc DL(Arg);
16655   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
16656   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
16657 
16658   // This routine must enter the loop below to work correctly
16659   // when (Reciprocal == false).
16660   assert(Iterations > 0);
16661 
16662   // Newton iterations for reciprocal square root:
16663   // E = (E * -0.5) * ((A * E) * E + -3.0)
16664   for (unsigned i = 0; i < Iterations; ++i) {
16665     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
16666     AddToWorklist(AE.getNode());
16667 
16668     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
16669     AddToWorklist(AEE.getNode());
16670 
16671     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
16672     AddToWorklist(RHS.getNode());
16673 
16674     // When calculating a square root at the last iteration build:
16675     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
16676     // (notice a common subexpression)
16677     SDValue LHS;
16678     if (Reciprocal || (i + 1) < Iterations) {
16679       // RSQRT: LHS = (E * -0.5)
16680       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
16681     } else {
16682       // SQRT: LHS = (A * E) * -0.5
16683       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
16684     }
16685     AddToWorklist(LHS.getNode());
16686 
16687     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
16688     AddToWorklist(Est.getNode());
16689   }
16690 
16691   return Est;
16692 }
16693 
16694 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
16695 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
16696 /// Op can be zero.
16697 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
16698                                            bool Reciprocal) {
16699   if (Level >= AfterLegalizeDAG)
16700     return SDValue();
16701 
16702   // TODO: Handle half and/or extended types?
16703   EVT VT = Op.getValueType();
16704   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16705     return SDValue();
16706 
16707   // If estimates are explicitly disabled for this function, we're done.
16708   MachineFunction &MF = DAG.getMachineFunction();
16709   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
16710   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16711     return SDValue();
16712 
16713   // Estimates may be explicitly enabled for this type with a custom number of
16714   // refinement steps.
16715   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
16716 
16717   bool UseOneConstNR = false;
16718   if (SDValue Est =
16719       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
16720                           Reciprocal)) {
16721     AddToWorklist(Est.getNode());
16722 
16723     if (Iterations) {
16724       Est = UseOneConstNR
16725             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
16726             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
16727 
16728       if (!Reciprocal) {
16729         // Unfortunately, Est is now NaN if the input was exactly 0.0.
16730         // Select out this case and force the answer to 0.0.
16731         EVT VT = Op.getValueType();
16732         SDLoc DL(Op);
16733 
16734         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
16735         EVT CCVT = getSetCCResultType(VT);
16736         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
16737         AddToWorklist(ZeroCmp.getNode());
16738 
16739         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
16740                           ZeroCmp, FPZero, Est);
16741         AddToWorklist(Est.getNode());
16742       }
16743     }
16744     return Est;
16745   }
16746 
16747   return SDValue();
16748 }
16749 
16750 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16751   return buildSqrtEstimateImpl(Op, Flags, true);
16752 }
16753 
16754 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16755   return buildSqrtEstimateImpl(Op, Flags, false);
16756 }
16757 
16758 /// Return true if base is a frame index, which is known not to alias with
16759 /// anything but itself.  Provides base object and offset as results.
16760 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
16761                            const GlobalValue *&GV, const void *&CV) {
16762   // Assume it is a primitive operation.
16763   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
16764 
16765   // If it's an adding a simple constant then integrate the offset.
16766   if (Base.getOpcode() == ISD::ADD) {
16767     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
16768       Base = Base.getOperand(0);
16769       Offset += C->getSExtValue();
16770     }
16771   }
16772 
16773   // Return the underlying GlobalValue, and update the Offset.  Return false
16774   // for GlobalAddressSDNode since the same GlobalAddress may be represented
16775   // by multiple nodes with different offsets.
16776   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
16777     GV = G->getGlobal();
16778     Offset += G->getOffset();
16779     return false;
16780   }
16781 
16782   // Return the underlying Constant value, and update the Offset.  Return false
16783   // for ConstantSDNodes since the same constant pool entry may be represented
16784   // by multiple nodes with different offsets.
16785   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16786     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16787                                          : (const void *)C->getConstVal();
16788     Offset += C->getOffset();
16789     return false;
16790   }
16791   // If it's any of the following then it can't alias with anything but itself.
16792   return isa<FrameIndexSDNode>(Base);
16793 }
16794 
16795 /// Return true if there is any possibility that the two addresses overlap.
16796 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16797   // If they are the same then they must be aliases.
16798   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16799 
16800   // If they are both volatile then they cannot be reordered.
16801   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16802 
16803   // If one operation reads from invariant memory, and the other may store, they
16804   // cannot alias. These should really be checking the equivalent of mayWrite,
16805   // but it only matters for memory nodes other than load /store.
16806   if (Op0->isInvariant() && Op1->writeMem())
16807     return false;
16808 
16809   if (Op1->isInvariant() && Op0->writeMem())
16810     return false;
16811 
16812   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
16813   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
16814 
16815   // Check for BaseIndexOffset matching.
16816   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
16817   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
16818   int64_t PtrDiff;
16819   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
16820     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
16821 
16822   // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
16823   // able to calculate their relative offset if at least one arises
16824   // from an alloca. However, these allocas cannot overlap and we
16825   // can infer there is no alias.
16826   if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
16827     if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
16828       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16829       // If the base are the same frame index but the we couldn't find a
16830       // constant offset, (indices are different) be conservative.
16831       if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
16832                      !MFI.isFixedObjectIndex(B->getIndex())))
16833         return false;
16834     }
16835 
16836   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
16837   // modified to use BaseIndexOffset.
16838 
16839   // Gather base node and offset information.
16840   SDValue Base0, Base1;
16841   int64_t Offset0, Offset1;
16842   const GlobalValue *GV0, *GV1;
16843   const void *CV0, *CV1;
16844   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
16845                                       Base0, Offset0, GV0, CV0);
16846   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
16847                                       Base1, Offset1, GV1, CV1);
16848 
16849   // If they have the same base address, then check to see if they overlap.
16850   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
16851     return !((Offset0 + NumBytes0) <= Offset1 ||
16852              (Offset1 + NumBytes1) <= Offset0);
16853 
16854   // It is possible for different frame indices to alias each other, mostly
16855   // when tail call optimization reuses return address slots for arguments.
16856   // To catch this case, look up the actual index of frame indices to compute
16857   // the real alias relationship.
16858   if (IsFrameIndex0 && IsFrameIndex1) {
16859     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16860     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
16861     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16862     return !((Offset0 + NumBytes0) <= Offset1 ||
16863              (Offset1 + NumBytes1) <= Offset0);
16864   }
16865 
16866   // Otherwise, if we know what the bases are, and they aren't identical, then
16867   // we know they cannot alias.
16868   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
16869     return false;
16870 
16871   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16872   // compared to the size and offset of the access, we may be able to prove they
16873   // do not alias. This check is conservative for now to catch cases created by
16874   // splitting vector types.
16875   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
16876   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
16877   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
16878   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
16879   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
16880       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
16881     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
16882     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
16883 
16884     // There is no overlap between these relatively aligned accesses of similar
16885     // size. Return no alias.
16886     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
16887         (OffAlign1 + NumBytes1) <= OffAlign0)
16888       return false;
16889   }
16890 
16891   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16892                    ? CombinerGlobalAA
16893                    : DAG.getSubtarget().useAA();
16894 #ifndef NDEBUG
16895   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16896       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16897     UseAA = false;
16898 #endif
16899 
16900   if (UseAA && AA &&
16901       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16902     // Use alias analysis information.
16903     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
16904     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
16905     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
16906     AliasResult AAResult =
16907         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
16908                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16909                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
16910                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
16911     if (AAResult == NoAlias)
16912       return false;
16913   }
16914 
16915   // Otherwise we have to assume they alias.
16916   return true;
16917 }
16918 
16919 /// Walk up chain skipping non-aliasing memory nodes,
16920 /// looking for aliasing nodes and adding them to the Aliases vector.
16921 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16922                                    SmallVectorImpl<SDValue> &Aliases) {
16923   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16924   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16925 
16926   // Get alias information for node.
16927   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16928 
16929   // Starting off.
16930   Chains.push_back(OriginalChain);
16931   unsigned Depth = 0;
16932 
16933   // Look at each chain and determine if it is an alias.  If so, add it to the
16934   // aliases list.  If not, then continue up the chain looking for the next
16935   // candidate.
16936   while (!Chains.empty()) {
16937     SDValue Chain = Chains.pop_back_val();
16938 
16939     // For TokenFactor nodes, look at each operand and only continue up the
16940     // chain until we reach the depth limit.
16941     //
16942     // FIXME: The depth check could be made to return the last non-aliasing
16943     // chain we found before we hit a tokenfactor rather than the original
16944     // chain.
16945     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16946       Aliases.clear();
16947       Aliases.push_back(OriginalChain);
16948       return;
16949     }
16950 
16951     // Don't bother if we've been before.
16952     if (!Visited.insert(Chain.getNode()).second)
16953       continue;
16954 
16955     switch (Chain.getOpcode()) {
16956     case ISD::EntryToken:
16957       // Entry token is ideal chain operand, but handled in FindBetterChain.
16958       break;
16959 
16960     case ISD::LOAD:
16961     case ISD::STORE: {
16962       // Get alias information for Chain.
16963       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16964           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16965 
16966       // If chain is alias then stop here.
16967       if (!(IsLoad && IsOpLoad) &&
16968           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16969         Aliases.push_back(Chain);
16970       } else {
16971         // Look further up the chain.
16972         Chains.push_back(Chain.getOperand(0));
16973         ++Depth;
16974       }
16975       break;
16976     }
16977 
16978     case ISD::TokenFactor:
16979       // We have to check each of the operands of the token factor for "small"
16980       // token factors, so we queue them up.  Adding the operands to the queue
16981       // (stack) in reverse order maintains the original order and increases the
16982       // likelihood that getNode will find a matching token factor (CSE.)
16983       if (Chain.getNumOperands() > 16) {
16984         Aliases.push_back(Chain);
16985         break;
16986       }
16987       for (unsigned n = Chain.getNumOperands(); n;)
16988         Chains.push_back(Chain.getOperand(--n));
16989       ++Depth;
16990       break;
16991 
16992     case ISD::CopyFromReg:
16993       // Forward past CopyFromReg.
16994       Chains.push_back(Chain.getOperand(0));
16995       ++Depth;
16996       break;
16997 
16998     default:
16999       // For all other instructions we will just have to take what we can get.
17000       Aliases.push_back(Chain);
17001       break;
17002     }
17003   }
17004 }
17005 
17006 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17007 /// (aliasing node.)
17008 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17009   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
17010 
17011   // Accumulate all the aliases to this node.
17012   GatherAllAliases(N, OldChain, Aliases);
17013 
17014   // If no operands then chain to entry token.
17015   if (Aliases.size() == 0)
17016     return DAG.getEntryNode();
17017 
17018   // If a single operand then chain to it.  We don't need to revisit it.
17019   if (Aliases.size() == 1)
17020     return Aliases[0];
17021 
17022   // Construct a custom tailored token factor.
17023   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17024 }
17025 
17026 // This function tries to collect a bunch of potentially interesting
17027 // nodes to improve the chains of, all at once. This might seem
17028 // redundant, as this function gets called when visiting every store
17029 // node, so why not let the work be done on each store as it's visited?
17030 //
17031 // I believe this is mainly important because MergeConsecutiveStores
17032 // is unable to deal with merging stores of different sizes, so unless
17033 // we improve the chains of all the potential candidates up-front
17034 // before running MergeConsecutiveStores, it might only see some of
17035 // the nodes that will eventually be candidates, and then not be able
17036 // to go from a partially-merged state to the desired final
17037 // fully-merged state.
17038 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17039   // This holds the base pointer, index, and the offset in bytes from the base
17040   // pointer.
17041   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
17042 
17043   // We must have a base and an offset.
17044   if (!BasePtr.getBase().getNode())
17045     return false;
17046 
17047   // Do not handle stores to undef base pointers.
17048   if (BasePtr.getBase().isUndef())
17049     return false;
17050 
17051   SmallVector<StoreSDNode *, 8> ChainedStores;
17052   ChainedStores.push_back(St);
17053 
17054   // Walk up the chain and look for nodes with offsets from the same
17055   // base pointer. Stop when reaching an instruction with a different kind
17056   // or instruction which has a different base pointer.
17057   StoreSDNode *Index = St;
17058   while (Index) {
17059     // If the chain has more than one use, then we can't reorder the mem ops.
17060     if (Index != St && !SDValue(Index, 0)->hasOneUse())
17061       break;
17062 
17063     if (Index->isVolatile() || Index->isIndexed())
17064       break;
17065 
17066     // Find the base pointer and offset for this memory node.
17067     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
17068 
17069     // Check that the base pointer is the same as the original one.
17070     if (!BasePtr.equalBaseIndex(Ptr, DAG))
17071       break;
17072 
17073     // Walk up the chain to find the next store node, ignoring any
17074     // intermediate loads. Any other kind of node will halt the loop.
17075     SDNode *NextInChain = Index->getChain().getNode();
17076     while (true) {
17077       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17078         // We found a store node. Use it for the next iteration.
17079         if (STn->isVolatile() || STn->isIndexed()) {
17080           Index = nullptr;
17081           break;
17082         }
17083         ChainedStores.push_back(STn);
17084         Index = STn;
17085         break;
17086       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17087         NextInChain = Ldn->getChain().getNode();
17088         continue;
17089       } else {
17090         Index = nullptr;
17091         break;
17092       }
17093     } // end while
17094   }
17095 
17096   // At this point, ChainedStores lists all of the Store nodes
17097   // reachable by iterating up through chain nodes matching the above
17098   // conditions.  For each such store identified, try to find an
17099   // earlier chain to attach the store to which won't violate the
17100   // required ordering.
17101   bool MadeChangeToSt = false;
17102   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17103 
17104   for (StoreSDNode *ChainedStore : ChainedStores) {
17105     SDValue Chain = ChainedStore->getChain();
17106     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17107 
17108     if (Chain != BetterChain) {
17109       if (ChainedStore == St)
17110         MadeChangeToSt = true;
17111       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17112     }
17113   }
17114 
17115   // Do all replacements after finding the replacements to make to avoid making
17116   // the chains more complicated by introducing new TokenFactors.
17117   for (auto Replacement : BetterChains)
17118     replaceStoreChain(Replacement.first, Replacement.second);
17119 
17120   return MadeChangeToSt;
17121 }
17122 
17123 /// This is the entry point for the file.
17124 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17125                            CodeGenOpt::Level OptLevel) {
17126   /// This is the main entry point to this class.
17127   DAGCombiner(*this, AA, OptLevel).Run(Level);
17128 }
17129