1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
29 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetOptions.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
43 #include "llvm/Target/TargetSubtargetInfo.h"
44 #include <algorithm>
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "dagcombine"
48 
49 STATISTIC(NodesCombined   , "Number of dag nodes combined");
50 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
51 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
52 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
53 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
54 STATISTIC(SlicedLoads, "Number of load sliced");
55 
56 namespace {
57   static cl::opt<bool>
58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61   static cl::opt<bool>
62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63                cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
66   static cl::opt<std::string>
67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68                cl::desc("Only use DAG-combiner alias analysis in this"
69                         " function"));
70 #endif
71 
72   /// Hidden option to stress test load slicing, i.e., when this option
73   /// is enabled, load slicing bypasses most of its profitability guards.
74   static cl::opt<bool>
75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76                     cl::desc("Bypass the profitability model of load "
77                              "slicing"),
78                     cl::init(false));
79 
80   static cl::opt<bool>
81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82                       cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86   class DAGCombiner {
87     SelectionDAG &DAG;
88     const TargetLowering &TLI;
89     CombineLevel Level;
90     CodeGenOpt::Level OptLevel;
91     bool LegalOperations;
92     bool LegalTypes;
93     bool ForCodeSize;
94 
95     /// \brief Worklist of all of the nodes that need to be simplified.
96     ///
97     /// This must behave as a stack -- new nodes to process are pushed onto the
98     /// back and when processing we pop off of the back.
99     ///
100     /// The worklist will not contain duplicates but may contain null entries
101     /// due to nodes being deleted from the underlying DAG.
102     SmallVector<SDNode *, 64> Worklist;
103 
104     /// \brief Mapping from an SDNode to its position on the worklist.
105     ///
106     /// This is used to find and remove nodes from the worklist (by nulling
107     /// them) when they are deleted from the underlying DAG. It relies on
108     /// stable indices of nodes within the worklist.
109     DenseMap<SDNode *, unsigned> WorklistMap;
110 
111     /// \brief Set of nodes which have been combined (at least once).
112     ///
113     /// This is used to allow us to reliably add any operands of a DAG node
114     /// which have not yet been combined to the worklist.
115     SmallPtrSet<SDNode *, 32> CombinedNodes;
116 
117     // AA - Used for DAG load/store alias analysis.
118     AliasAnalysis *AA;
119 
120     /// When an instruction is simplified, add all users of the instruction to
121     /// the work lists because they might get more simplified now.
122     void AddUsersToWorklist(SDNode *N) {
123       for (SDNode *Node : N->uses())
124         AddToWorklist(Node);
125     }
126 
127     /// Call the node-specific routine that folds each particular type of node.
128     SDValue visit(SDNode *N);
129 
130   public:
131     /// Add to the worklist making sure its instance is at the back (next to be
132     /// processed.)
133     void AddToWorklist(SDNode *N) {
134       assert(N->getOpcode() != ISD::DELETED_NODE &&
135              "Deleted Node added to Worklist");
136 
137       // Skip handle nodes as they can't usefully be combined and confuse the
138       // zero-use deletion strategy.
139       if (N->getOpcode() == ISD::HANDLENODE)
140         return;
141 
142       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
143         Worklist.push_back(N);
144     }
145 
146     /// Remove all instances of N from the worklist.
147     void removeFromWorklist(SDNode *N) {
148       CombinedNodes.erase(N);
149 
150       auto It = WorklistMap.find(N);
151       if (It == WorklistMap.end())
152         return; // Not in the worklist.
153 
154       // Null out the entry rather than erasing it to avoid a linear operation.
155       Worklist[It->second] = nullptr;
156       WorklistMap.erase(It);
157     }
158 
159     void deleteAndRecombine(SDNode *N);
160     bool recursivelyDeleteUnusedNodes(SDNode *N);
161 
162     /// Replaces all uses of the results of one DAG node with new values.
163     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
164                       bool AddTo = true);
165 
166     /// Replaces all uses of the results of one DAG node with new values.
167     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
168       return CombineTo(N, &Res, 1, AddTo);
169     }
170 
171     /// Replaces all uses of the results of one DAG node with new values.
172     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
173                       bool AddTo = true) {
174       SDValue To[] = { Res0, Res1 };
175       return CombineTo(N, To, 2, AddTo);
176     }
177 
178     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
179 
180   private:
181     unsigned MaximumLegalStoreInBits;
182 
183     /// Check the specified integer node value to see if it can be simplified or
184     /// if things it uses can be simplified by bit propagation.
185     /// If so, return true.
186     bool SimplifyDemandedBits(SDValue Op) {
187       unsigned BitWidth = Op.getScalarValueSizeInBits();
188       APInt Demanded = APInt::getAllOnesValue(BitWidth);
189       return SimplifyDemandedBits(Op, Demanded);
190     }
191 
192     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
193 
194     bool CombineToPreIndexedLoadStore(SDNode *N);
195     bool CombineToPostIndexedLoadStore(SDNode *N);
196     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
197     bool SliceUpLoad(SDNode *N);
198 
199     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
200     ///   load.
201     ///
202     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
203     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
204     /// \param EltNo index of the vector element to load.
205     /// \param OriginalLoad load that EVE came from to be replaced.
206     /// \returns EVE on success SDValue() on failure.
207     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
208         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
209     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
210     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
211     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
212     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
213     SDValue PromoteIntBinOp(SDValue Op);
214     SDValue PromoteIntShiftOp(SDValue Op);
215     SDValue PromoteExtend(SDValue Op);
216     bool PromoteLoad(SDValue Op);
217 
218     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
219                          SDValue ExtLoad, const SDLoc &DL,
220                          ISD::NodeType ExtType);
221 
222     /// Call the node-specific routine that knows how to fold each
223     /// particular type of node. If that doesn't do anything, try the
224     /// target-specific DAG combines.
225     SDValue combine(SDNode *N);
226 
227     // Visitation implementation - Implement dag node combining for different
228     // node types.  The semantics are as follows:
229     // Return Value:
230     //   SDValue.getNode() == 0 - No change was made
231     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
232     //   otherwise              - N should be replaced by the returned Operand.
233     //
234     SDValue visitTokenFactor(SDNode *N);
235     SDValue visitMERGE_VALUES(SDNode *N);
236     SDValue visitADD(SDNode *N);
237     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
238     SDValue visitSUB(SDNode *N);
239     SDValue visitADDC(SDNode *N);
240     SDValue visitUADDO(SDNode *N);
241     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
242     SDValue visitSUBC(SDNode *N);
243     SDValue visitUSUBO(SDNode *N);
244     SDValue visitADDE(SDNode *N);
245     SDValue visitADDCARRY(SDNode *N);
246     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
247     SDValue visitSUBE(SDNode *N);
248     SDValue visitSUBCARRY(SDNode *N);
249     SDValue visitMUL(SDNode *N);
250     SDValue useDivRem(SDNode *N);
251     SDValue visitSDIV(SDNode *N);
252     SDValue visitUDIV(SDNode *N);
253     SDValue visitREM(SDNode *N);
254     SDValue visitMULHU(SDNode *N);
255     SDValue visitMULHS(SDNode *N);
256     SDValue visitSMUL_LOHI(SDNode *N);
257     SDValue visitUMUL_LOHI(SDNode *N);
258     SDValue visitSMULO(SDNode *N);
259     SDValue visitUMULO(SDNode *N);
260     SDValue visitIMINMAX(SDNode *N);
261     SDValue visitAND(SDNode *N);
262     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
263     SDValue visitOR(SDNode *N);
264     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
265     SDValue visitXOR(SDNode *N);
266     SDValue SimplifyVBinOp(SDNode *N);
267     SDValue visitSHL(SDNode *N);
268     SDValue visitSRA(SDNode *N);
269     SDValue visitSRL(SDNode *N);
270     SDValue visitRotate(SDNode *N);
271     SDValue visitABS(SDNode *N);
272     SDValue visitBSWAP(SDNode *N);
273     SDValue visitBITREVERSE(SDNode *N);
274     SDValue visitCTLZ(SDNode *N);
275     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
276     SDValue visitCTTZ(SDNode *N);
277     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
278     SDValue visitCTPOP(SDNode *N);
279     SDValue visitSELECT(SDNode *N);
280     SDValue visitVSELECT(SDNode *N);
281     SDValue visitSELECT_CC(SDNode *N);
282     SDValue visitSETCC(SDNode *N);
283     SDValue visitSETCCE(SDNode *N);
284     SDValue visitSETCCCARRY(SDNode *N);
285     SDValue visitSIGN_EXTEND(SDNode *N);
286     SDValue visitZERO_EXTEND(SDNode *N);
287     SDValue visitANY_EXTEND(SDNode *N);
288     SDValue visitAssertZext(SDNode *N);
289     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
290     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
291     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
292     SDValue visitTRUNCATE(SDNode *N);
293     SDValue visitBITCAST(SDNode *N);
294     SDValue visitBUILD_PAIR(SDNode *N);
295     SDValue visitFADD(SDNode *N);
296     SDValue visitFSUB(SDNode *N);
297     SDValue visitFMUL(SDNode *N);
298     SDValue visitFMA(SDNode *N);
299     SDValue visitFDIV(SDNode *N);
300     SDValue visitFREM(SDNode *N);
301     SDValue visitFSQRT(SDNode *N);
302     SDValue visitFCOPYSIGN(SDNode *N);
303     SDValue visitSINT_TO_FP(SDNode *N);
304     SDValue visitUINT_TO_FP(SDNode *N);
305     SDValue visitFP_TO_SINT(SDNode *N);
306     SDValue visitFP_TO_UINT(SDNode *N);
307     SDValue visitFP_ROUND(SDNode *N);
308     SDValue visitFP_ROUND_INREG(SDNode *N);
309     SDValue visitFP_EXTEND(SDNode *N);
310     SDValue visitFNEG(SDNode *N);
311     SDValue visitFABS(SDNode *N);
312     SDValue visitFCEIL(SDNode *N);
313     SDValue visitFTRUNC(SDNode *N);
314     SDValue visitFFLOOR(SDNode *N);
315     SDValue visitFMINNUM(SDNode *N);
316     SDValue visitFMAXNUM(SDNode *N);
317     SDValue visitBRCOND(SDNode *N);
318     SDValue visitBR_CC(SDNode *N);
319     SDValue visitLOAD(SDNode *N);
320 
321     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
322     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
323 
324     SDValue visitSTORE(SDNode *N);
325     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
326     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
327     SDValue visitBUILD_VECTOR(SDNode *N);
328     SDValue visitCONCAT_VECTORS(SDNode *N);
329     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
330     SDValue visitVECTOR_SHUFFLE(SDNode *N);
331     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
332     SDValue visitINSERT_SUBVECTOR(SDNode *N);
333     SDValue visitMLOAD(SDNode *N);
334     SDValue visitMSTORE(SDNode *N);
335     SDValue visitMGATHER(SDNode *N);
336     SDValue visitMSCATTER(SDNode *N);
337     SDValue visitFP_TO_FP16(SDNode *N);
338     SDValue visitFP16_TO_FP(SDNode *N);
339 
340     SDValue visitFADDForFMACombine(SDNode *N);
341     SDValue visitFSUBForFMACombine(SDNode *N);
342     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
343 
344     SDValue XformToShuffleWithZero(SDNode *N);
345     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
346                            SDValue RHS);
347 
348     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
349 
350     SDValue foldSelectOfConstants(SDNode *N);
351     SDValue foldVSelectOfConstants(SDNode *N);
352     SDValue foldBinOpIntoSelect(SDNode *BO);
353     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
354     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
355     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
356     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
357                              SDValue N2, SDValue N3, ISD::CondCode CC,
358                              bool NotExtCompare = false);
359     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
360                                    SDValue N2, SDValue N3, ISD::CondCode CC);
361     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
362                               const SDLoc &DL);
363     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
364                           const SDLoc &DL, bool foldBooleans = true);
365 
366     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
367                            SDValue &CC) const;
368     bool isOneUseSetCC(SDValue N) const;
369 
370     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
371                                          unsigned HiOp);
372     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
373     SDValue CombineExtLoad(SDNode *N);
374     SDValue combineRepeatedFPDivisors(SDNode *N);
375     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
376     SDValue BuildSDIV(SDNode *N);
377     SDValue BuildSDIVPow2(SDNode *N);
378     SDValue BuildUDIV(SDNode *N);
379     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
380     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
381     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
382     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
383     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
384     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
385                                 SDNodeFlags Flags, bool Reciprocal);
386     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
387                                 SDNodeFlags Flags, bool Reciprocal);
388     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
389                                bool DemandHighBits = true);
390     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
391     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
392                               SDValue InnerPos, SDValue InnerNeg,
393                               unsigned PosOpcode, unsigned NegOpcode,
394                               const SDLoc &DL);
395     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
396     SDValue MatchLoadCombine(SDNode *N);
397     SDValue ReduceLoadWidth(SDNode *N);
398     SDValue ReduceLoadOpStoreWidth(SDNode *N);
399     SDValue splitMergedValStore(StoreSDNode *ST);
400     SDValue TransformFPLoadStorePair(SDNode *N);
401     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
402     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
403     SDValue reduceBuildVecToShuffle(SDNode *N);
404     SDValue reduceBuildVecToTrunc(SDNode *N);
405     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
406                                   ArrayRef<int> VectorMask, SDValue VecIn1,
407                                   SDValue VecIn2, unsigned LeftIdx);
408     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
409 
410     /// Walk up chain skipping non-aliasing memory nodes,
411     /// looking for aliasing nodes and adding them to the Aliases vector.
412     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
413                           SmallVectorImpl<SDValue> &Aliases);
414 
415     /// Return true if there is any possibility that the two addresses overlap.
416     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
417 
418     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
419     /// chain (aliasing node.)
420     SDValue FindBetterChain(SDNode *N, SDValue Chain);
421 
422     /// Try to replace a store and any possibly adjacent stores on
423     /// consecutive chains with better chains. Return true only if St is
424     /// replaced.
425     ///
426     /// Notice that other chains may still be replaced even if the function
427     /// returns false.
428     bool findBetterNeighborChains(StoreSDNode *St);
429 
430     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
431     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
432 
433     /// Holds a pointer to an LSBaseSDNode as well as information on where it
434     /// is located in a sequence of memory operations connected by a chain.
435     struct MemOpLink {
436       MemOpLink(LSBaseSDNode *N, int64_t Offset)
437           : MemNode(N), OffsetFromBase(Offset) {}
438       // Ptr to the mem node.
439       LSBaseSDNode *MemNode;
440       // Offset from the base ptr.
441       int64_t OffsetFromBase;
442     };
443 
444     /// This is a helper function for visitMUL to check the profitability
445     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
446     /// MulNode is the original multiply, AddNode is (add x, c1),
447     /// and ConstNode is c2.
448     bool isMulAddWithConstProfitable(SDNode *MulNode,
449                                      SDValue &AddNode,
450                                      SDValue &ConstNode);
451 
452 
453     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
454     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
455     /// the type of the loaded value to be extended.  LoadedVT returns the type
456     /// of the original loaded value.  NarrowLoad returns whether the load would
457     /// need to be narrowed in order to match.
458     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
459                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
460                           bool &NarrowLoad);
461 
462     /// Helper function for MergeConsecutiveStores which merges the
463     /// component store chains.
464     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
465                                 unsigned NumStores);
466 
467     /// This is a helper function for MergeConsecutiveStores. When the
468     /// source elements of the consecutive stores are all constants or
469     /// all extracted vector elements, try to merge them into one
470     /// larger store introducing bitcasts if necessary.  \return True
471     /// if a merged store was created.
472     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
473                                          EVT MemVT, unsigned NumStores,
474                                          bool IsConstantSrc, bool UseVector,
475                                          bool UseTrunc);
476 
477     /// This is a helper function for MergeConsecutiveStores. Stores
478     /// that potentially may be merged with St are placed in
479     /// StoreNodes.
480     void getStoreMergeCandidates(StoreSDNode *St,
481                                  SmallVectorImpl<MemOpLink> &StoreNodes);
482 
483     /// Helper function for MergeConsecutiveStores. Checks if
484     /// candidate stores have indirect dependency through their
485     /// operands. \return True if safe to merge.
486     bool checkMergeStoreCandidatesForDependencies(
487         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
488 
489     /// Merge consecutive store operations into a wide store.
490     /// This optimization uses wide integers or vectors when possible.
491     /// \return number of stores that were merged into a merged store (the
492     /// affected nodes are stored as a prefix in \p StoreNodes).
493     bool MergeConsecutiveStores(StoreSDNode *N);
494 
495     /// \brief Try to transform a truncation where C is a constant:
496     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
497     ///
498     /// \p N needs to be a truncation and its first operand an AND. Other
499     /// requirements are checked by the function (e.g. that trunc is
500     /// single-use) and if missed an empty SDValue is returned.
501     SDValue distributeTruncateThroughAnd(SDNode *N);
502 
503   public:
504     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
505         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
506           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
507       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
508 
509       MaximumLegalStoreInBits = 0;
510       for (MVT VT : MVT::all_valuetypes())
511         if (EVT(VT).isSimple() && VT != MVT::Other &&
512             TLI.isTypeLegal(EVT(VT)) &&
513             VT.getSizeInBits() >= MaximumLegalStoreInBits)
514           MaximumLegalStoreInBits = VT.getSizeInBits();
515     }
516 
517     /// Runs the dag combiner on all nodes in the work list
518     void Run(CombineLevel AtLevel);
519 
520     SelectionDAG &getDAG() const { return DAG; }
521 
522     /// Returns a type large enough to hold any valid shift amount - before type
523     /// legalization these can be huge.
524     EVT getShiftAmountTy(EVT LHSTy) {
525       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
526       if (LHSTy.isVector())
527         return LHSTy;
528       auto &DL = DAG.getDataLayout();
529       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
530                         : TLI.getPointerTy(DL);
531     }
532 
533     /// This method returns true if we are running before type legalization or
534     /// if the specified VT is legal.
535     bool isTypeLegal(const EVT &VT) {
536       if (!LegalTypes) return true;
537       return TLI.isTypeLegal(VT);
538     }
539 
540     /// Convenience wrapper around TargetLowering::getSetCCResultType
541     EVT getSetCCResultType(EVT VT) const {
542       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
543     }
544   };
545 }
546 
547 
548 namespace {
549 /// This class is a DAGUpdateListener that removes any deleted
550 /// nodes from the worklist.
551 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
552   DAGCombiner &DC;
553 public:
554   explicit WorklistRemover(DAGCombiner &dc)
555     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
556 
557   void NodeDeleted(SDNode *N, SDNode *E) override {
558     DC.removeFromWorklist(N);
559   }
560 };
561 }
562 
563 //===----------------------------------------------------------------------===//
564 //  TargetLowering::DAGCombinerInfo implementation
565 //===----------------------------------------------------------------------===//
566 
567 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
568   ((DAGCombiner*)DC)->AddToWorklist(N);
569 }
570 
571 SDValue TargetLowering::DAGCombinerInfo::
572 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
573   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
574 }
575 
576 SDValue TargetLowering::DAGCombinerInfo::
577 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
578   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
579 }
580 
581 
582 SDValue TargetLowering::DAGCombinerInfo::
583 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
584   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
585 }
586 
587 void TargetLowering::DAGCombinerInfo::
588 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
589   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
590 }
591 
592 //===----------------------------------------------------------------------===//
593 // Helper Functions
594 //===----------------------------------------------------------------------===//
595 
596 void DAGCombiner::deleteAndRecombine(SDNode *N) {
597   removeFromWorklist(N);
598 
599   // If the operands of this node are only used by the node, they will now be
600   // dead. Make sure to re-visit them and recursively delete dead nodes.
601   for (const SDValue &Op : N->ops())
602     // For an operand generating multiple values, one of the values may
603     // become dead allowing further simplification (e.g. split index
604     // arithmetic from an indexed load).
605     if (Op->hasOneUse() || Op->getNumValues() > 1)
606       AddToWorklist(Op.getNode());
607 
608   DAG.DeleteNode(N);
609 }
610 
611 /// Return 1 if we can compute the negated form of the specified expression for
612 /// the same cost as the expression itself, or 2 if we can compute the negated
613 /// form more cheaply than the expression itself.
614 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
615                                const TargetLowering &TLI,
616                                const TargetOptions *Options,
617                                unsigned Depth = 0) {
618   // fneg is removable even if it has multiple uses.
619   if (Op.getOpcode() == ISD::FNEG) return 2;
620 
621   // Don't allow anything with multiple uses.
622   if (!Op.hasOneUse()) return 0;
623 
624   // Don't recurse exponentially.
625   if (Depth > 6) return 0;
626 
627   switch (Op.getOpcode()) {
628   default: return false;
629   case ISD::ConstantFP: {
630     if (!LegalOperations)
631       return 1;
632 
633     // Don't invert constant FP values after legalization unless the target says
634     // the negated constant is legal.
635     EVT VT = Op.getValueType();
636     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
637       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
638   }
639   case ISD::FADD:
640     // FIXME: determine better conditions for this xform.
641     if (!Options->UnsafeFPMath) return 0;
642 
643     // After operation legalization, it might not be legal to create new FSUBs.
644     if (LegalOperations &&
645         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
646       return 0;
647 
648     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
649     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
650                                     Options, Depth + 1))
651       return V;
652     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
653     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
654                               Depth + 1);
655   case ISD::FSUB:
656     // We can't turn -(A-B) into B-A when we honor signed zeros.
657     if (!Options->NoSignedZerosFPMath &&
658         !Op.getNode()->getFlags().hasNoSignedZeros())
659       return 0;
660 
661     // fold (fneg (fsub A, B)) -> (fsub B, A)
662     return 1;
663 
664   case ISD::FMUL:
665   case ISD::FDIV:
666     if (Options->HonorSignDependentRoundingFPMath()) return 0;
667 
668     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
669     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
670                                     Options, Depth + 1))
671       return V;
672 
673     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
674                               Depth + 1);
675 
676   case ISD::FP_EXTEND:
677   case ISD::FP_ROUND:
678   case ISD::FSIN:
679     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
680                               Depth + 1);
681   }
682 }
683 
684 /// If isNegatibleForFree returns true, return the newly negated expression.
685 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
686                                     bool LegalOperations, unsigned Depth = 0) {
687   const TargetOptions &Options = DAG.getTarget().Options;
688   // fneg is removable even if it has multiple uses.
689   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
690 
691   // Don't allow anything with multiple uses.
692   assert(Op.hasOneUse() && "Unknown reuse!");
693 
694   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
695 
696   const SDNodeFlags Flags = Op.getNode()->getFlags();
697 
698   switch (Op.getOpcode()) {
699   default: llvm_unreachable("Unknown code");
700   case ISD::ConstantFP: {
701     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
702     V.changeSign();
703     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
704   }
705   case ISD::FADD:
706     // FIXME: determine better conditions for this xform.
707     assert(Options.UnsafeFPMath);
708 
709     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
710     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
711                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
712       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
713                          GetNegatedExpression(Op.getOperand(0), DAG,
714                                               LegalOperations, Depth+1),
715                          Op.getOperand(1), Flags);
716     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
717     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
718                        GetNegatedExpression(Op.getOperand(1), DAG,
719                                             LegalOperations, Depth+1),
720                        Op.getOperand(0), Flags);
721   case ISD::FSUB:
722     // fold (fneg (fsub 0, B)) -> B
723     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
724       if (N0CFP->isZero())
725         return Op.getOperand(1);
726 
727     // fold (fneg (fsub A, B)) -> (fsub B, A)
728     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
729                        Op.getOperand(1), Op.getOperand(0), Flags);
730 
731   case ISD::FMUL:
732   case ISD::FDIV:
733     assert(!Options.HonorSignDependentRoundingFPMath());
734 
735     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
736     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
737                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
738       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
739                          GetNegatedExpression(Op.getOperand(0), DAG,
740                                               LegalOperations, Depth+1),
741                          Op.getOperand(1), Flags);
742 
743     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
744     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
745                        Op.getOperand(0),
746                        GetNegatedExpression(Op.getOperand(1), DAG,
747                                             LegalOperations, Depth+1), Flags);
748 
749   case ISD::FP_EXTEND:
750   case ISD::FSIN:
751     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
752                        GetNegatedExpression(Op.getOperand(0), DAG,
753                                             LegalOperations, Depth+1));
754   case ISD::FP_ROUND:
755       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
756                          GetNegatedExpression(Op.getOperand(0), DAG,
757                                               LegalOperations, Depth+1),
758                          Op.getOperand(1));
759   }
760 }
761 
762 // APInts must be the same size for most operations, this helper
763 // function zero extends the shorter of the pair so that they match.
764 // We provide an Offset so that we can create bitwidths that won't overflow.
765 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
766   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
767   LHS = LHS.zextOrSelf(Bits);
768   RHS = RHS.zextOrSelf(Bits);
769 }
770 
771 // Return true if this node is a setcc, or is a select_cc
772 // that selects between the target values used for true and false, making it
773 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
774 // the appropriate nodes based on the type of node we are checking. This
775 // simplifies life a bit for the callers.
776 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
777                                     SDValue &CC) const {
778   if (N.getOpcode() == ISD::SETCC) {
779     LHS = N.getOperand(0);
780     RHS = N.getOperand(1);
781     CC  = N.getOperand(2);
782     return true;
783   }
784 
785   if (N.getOpcode() != ISD::SELECT_CC ||
786       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
787       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
788     return false;
789 
790   if (TLI.getBooleanContents(N.getValueType()) ==
791       TargetLowering::UndefinedBooleanContent)
792     return false;
793 
794   LHS = N.getOperand(0);
795   RHS = N.getOperand(1);
796   CC  = N.getOperand(4);
797   return true;
798 }
799 
800 /// Return true if this is a SetCC-equivalent operation with only one use.
801 /// If this is true, it allows the users to invert the operation for free when
802 /// it is profitable to do so.
803 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
804   SDValue N0, N1, N2;
805   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
806     return true;
807   return false;
808 }
809 
810 // \brief Returns the SDNode if it is a constant float BuildVector
811 // or constant float.
812 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
813   if (isa<ConstantFPSDNode>(N))
814     return N.getNode();
815   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
816     return N.getNode();
817   return nullptr;
818 }
819 
820 // Determines if it is a constant integer or a build vector of constant
821 // integers (and undefs).
822 // Do not permit build vector implicit truncation.
823 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
824   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
825     return !(Const->isOpaque() && NoOpaques);
826   if (N.getOpcode() != ISD::BUILD_VECTOR)
827     return false;
828   unsigned BitWidth = N.getScalarValueSizeInBits();
829   for (const SDValue &Op : N->op_values()) {
830     if (Op.isUndef())
831       continue;
832     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
833     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
834         (Const->isOpaque() && NoOpaques))
835       return false;
836   }
837   return true;
838 }
839 
840 // Determines if it is a constant null integer or a splatted vector of a
841 // constant null integer (with no undefs).
842 // Build vector implicit truncation is not an issue for null values.
843 static bool isNullConstantOrNullSplatConstant(SDValue N) {
844   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
845     return Splat->isNullValue();
846   return false;
847 }
848 
849 // Determines if it is a constant integer of one or a splatted vector of a
850 // constant integer of one (with no undefs).
851 // Do not permit build vector implicit truncation.
852 static bool isOneConstantOrOneSplatConstant(SDValue N) {
853   unsigned BitWidth = N.getScalarValueSizeInBits();
854   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
855     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
856   return false;
857 }
858 
859 // Determines if it is a constant integer of all ones or a splatted vector of a
860 // constant integer of all ones (with no undefs).
861 // Do not permit build vector implicit truncation.
862 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
863   unsigned BitWidth = N.getScalarValueSizeInBits();
864   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
865     return Splat->isAllOnesValue() &&
866            Splat->getAPIntValue().getBitWidth() == BitWidth;
867   return false;
868 }
869 
870 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
871 // undef's.
872 static bool isAnyConstantBuildVector(const SDNode *N) {
873   return ISD::isBuildVectorOfConstantSDNodes(N) ||
874          ISD::isBuildVectorOfConstantFPSDNodes(N);
875 }
876 
877 // Attempt to match a unary predicate against a scalar/splat constant or
878 // every element of a constant BUILD_VECTOR.
879 static bool matchUnaryPredicate(SDValue Op,
880                                 std::function<bool(ConstantSDNode *)> Match) {
881   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
882     return Match(Cst);
883 
884   if (ISD::BUILD_VECTOR != Op.getOpcode())
885     return false;
886 
887   EVT SVT = Op.getValueType().getScalarType();
888   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
889     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
890     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
891       return false;
892   }
893   return true;
894 }
895 
896 // Attempt to match a binary predicate against a pair of scalar/splat constants
897 // or every element of a pair of constant BUILD_VECTORs.
898 static bool matchBinaryPredicate(
899     SDValue LHS, SDValue RHS,
900     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
901   if (LHS.getValueType() != RHS.getValueType())
902     return false;
903 
904   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
905     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
906       return Match(LHSCst, RHSCst);
907 
908   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
909       ISD::BUILD_VECTOR != RHS.getOpcode())
910     return false;
911 
912   EVT SVT = LHS.getValueType().getScalarType();
913   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
914     auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
915     auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
916     if (!LHSCst || !RHSCst)
917       return false;
918     if (LHSCst->getValueType(0) != SVT ||
919         LHSCst->getValueType(0) != RHSCst->getValueType(0))
920       return false;
921     if (!Match(LHSCst, RHSCst))
922       return false;
923   }
924   return true;
925 }
926 
927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
928                                     SDValue N1) {
929   EVT VT = N0.getValueType();
930   if (N0.getOpcode() == Opc) {
931     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
932       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
933         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
934         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
935           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
936         return SDValue();
937       }
938       if (N0.hasOneUse()) {
939         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
940         // use
941         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
942         if (!OpNode.getNode())
943           return SDValue();
944         AddToWorklist(OpNode.getNode());
945         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
946       }
947     }
948   }
949 
950   if (N1.getOpcode() == Opc) {
951     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
952       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
953         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
954         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
955           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
956         return SDValue();
957       }
958       if (N1.hasOneUse()) {
959         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
960         // use
961         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
962         if (!OpNode.getNode())
963           return SDValue();
964         AddToWorklist(OpNode.getNode());
965         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
966       }
967     }
968   }
969 
970   return SDValue();
971 }
972 
973 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
974                                bool AddTo) {
975   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
976   ++NodesCombined;
977   DEBUG(dbgs() << "\nReplacing.1 ";
978         N->dump(&DAG);
979         dbgs() << "\nWith: ";
980         To[0].getNode()->dump(&DAG);
981         dbgs() << " and " << NumTo-1 << " other values\n");
982   for (unsigned i = 0, e = NumTo; i != e; ++i)
983     assert((!To[i].getNode() ||
984             N->getValueType(i) == To[i].getValueType()) &&
985            "Cannot combine value to value of different type!");
986 
987   WorklistRemover DeadNodes(*this);
988   DAG.ReplaceAllUsesWith(N, To);
989   if (AddTo) {
990     // Push the new nodes and any users onto the worklist
991     for (unsigned i = 0, e = NumTo; i != e; ++i) {
992       if (To[i].getNode()) {
993         AddToWorklist(To[i].getNode());
994         AddUsersToWorklist(To[i].getNode());
995       }
996     }
997   }
998 
999   // Finally, if the node is now dead, remove it from the graph.  The node
1000   // may not be dead if the replacement process recursively simplified to
1001   // something else needing this node.
1002   if (N->use_empty())
1003     deleteAndRecombine(N);
1004   return SDValue(N, 0);
1005 }
1006 
1007 void DAGCombiner::
1008 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1009   // Replace all uses.  If any nodes become isomorphic to other nodes and
1010   // are deleted, make sure to remove them from our worklist.
1011   WorklistRemover DeadNodes(*this);
1012   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1013 
1014   // Push the new node and any (possibly new) users onto the worklist.
1015   AddToWorklist(TLO.New.getNode());
1016   AddUsersToWorklist(TLO.New.getNode());
1017 
1018   // Finally, if the node is now dead, remove it from the graph.  The node
1019   // may not be dead if the replacement process recursively simplified to
1020   // something else needing this node.
1021   if (TLO.Old.getNode()->use_empty())
1022     deleteAndRecombine(TLO.Old.getNode());
1023 }
1024 
1025 /// Check the specified integer node value to see if it can be simplified or if
1026 /// things it uses can be simplified by bit propagation. If so, return true.
1027 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1028   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1029   KnownBits Known;
1030   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1031     return false;
1032 
1033   // Revisit the node.
1034   AddToWorklist(Op.getNode());
1035 
1036   // Replace the old value with the new one.
1037   ++NodesCombined;
1038   DEBUG(dbgs() << "\nReplacing.2 ";
1039         TLO.Old.getNode()->dump(&DAG);
1040         dbgs() << "\nWith: ";
1041         TLO.New.getNode()->dump(&DAG);
1042         dbgs() << '\n');
1043 
1044   CommitTargetLoweringOpt(TLO);
1045   return true;
1046 }
1047 
1048 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1049   SDLoc DL(Load);
1050   EVT VT = Load->getValueType(0);
1051   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1052 
1053   DEBUG(dbgs() << "\nReplacing.9 ";
1054         Load->dump(&DAG);
1055         dbgs() << "\nWith: ";
1056         Trunc.getNode()->dump(&DAG);
1057         dbgs() << '\n');
1058   WorklistRemover DeadNodes(*this);
1059   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1060   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1061   deleteAndRecombine(Load);
1062   AddToWorklist(Trunc.getNode());
1063 }
1064 
1065 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1066   Replace = false;
1067   SDLoc DL(Op);
1068   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1069     LoadSDNode *LD = cast<LoadSDNode>(Op);
1070     EVT MemVT = LD->getMemoryVT();
1071     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1072       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1073                                                        : ISD::EXTLOAD)
1074       : LD->getExtensionType();
1075     Replace = true;
1076     return DAG.getExtLoad(ExtType, DL, PVT,
1077                           LD->getChain(), LD->getBasePtr(),
1078                           MemVT, LD->getMemOperand());
1079   }
1080 
1081   unsigned Opc = Op.getOpcode();
1082   switch (Opc) {
1083   default: break;
1084   case ISD::AssertSext:
1085     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1086       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1087     break;
1088   case ISD::AssertZext:
1089     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1090       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1091     break;
1092   case ISD::Constant: {
1093     unsigned ExtOpc =
1094       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1095     return DAG.getNode(ExtOpc, DL, PVT, Op);
1096   }
1097   }
1098 
1099   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1100     return SDValue();
1101   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1102 }
1103 
1104 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1105   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1106     return SDValue();
1107   EVT OldVT = Op.getValueType();
1108   SDLoc DL(Op);
1109   bool Replace = false;
1110   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1111   if (!NewOp.getNode())
1112     return SDValue();
1113   AddToWorklist(NewOp.getNode());
1114 
1115   if (Replace)
1116     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1117   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1118                      DAG.getValueType(OldVT));
1119 }
1120 
1121 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1122   EVT OldVT = Op.getValueType();
1123   SDLoc DL(Op);
1124   bool Replace = false;
1125   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1126   if (!NewOp.getNode())
1127     return SDValue();
1128   AddToWorklist(NewOp.getNode());
1129 
1130   if (Replace)
1131     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1132   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1133 }
1134 
1135 /// Promote the specified integer binary operation if the target indicates it is
1136 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1137 /// i32 since i16 instructions are longer.
1138 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1139   if (!LegalOperations)
1140     return SDValue();
1141 
1142   EVT VT = Op.getValueType();
1143   if (VT.isVector() || !VT.isInteger())
1144     return SDValue();
1145 
1146   // If operation type is 'undesirable', e.g. i16 on x86, consider
1147   // promoting it.
1148   unsigned Opc = Op.getOpcode();
1149   if (TLI.isTypeDesirableForOp(Opc, VT))
1150     return SDValue();
1151 
1152   EVT PVT = VT;
1153   // Consult target whether it is a good idea to promote this operation and
1154   // what's the right type to promote it to.
1155   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1156     assert(PVT != VT && "Don't know what type to promote to!");
1157 
1158     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1159 
1160     bool Replace0 = false;
1161     SDValue N0 = Op.getOperand(0);
1162     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1163 
1164     bool Replace1 = false;
1165     SDValue N1 = Op.getOperand(1);
1166     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1167     SDLoc DL(Op);
1168 
1169     SDValue RV =
1170         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1171 
1172     // We are always replacing N0/N1's use in N and only need
1173     // additional replacements if there are additional uses.
1174     Replace0 &= !N0->hasOneUse();
1175     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1176 
1177     // Combine Op here so it is presreved past replacements.
1178     CombineTo(Op.getNode(), RV);
1179 
1180     // If operands have a use ordering, make sur we deal with
1181     // predecessor first.
1182     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1183       std::swap(N0, N1);
1184       std::swap(NN0, NN1);
1185     }
1186 
1187     if (Replace0) {
1188       AddToWorklist(NN0.getNode());
1189       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1190     }
1191     if (Replace1) {
1192       AddToWorklist(NN1.getNode());
1193       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1194     }
1195     return Op;
1196   }
1197   return SDValue();
1198 }
1199 
1200 /// Promote the specified integer shift operation if the target indicates it is
1201 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1202 /// i32 since i16 instructions are longer.
1203 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1204   if (!LegalOperations)
1205     return SDValue();
1206 
1207   EVT VT = Op.getValueType();
1208   if (VT.isVector() || !VT.isInteger())
1209     return SDValue();
1210 
1211   // If operation type is 'undesirable', e.g. i16 on x86, consider
1212   // promoting it.
1213   unsigned Opc = Op.getOpcode();
1214   if (TLI.isTypeDesirableForOp(Opc, VT))
1215     return SDValue();
1216 
1217   EVT PVT = VT;
1218   // Consult target whether it is a good idea to promote this operation and
1219   // what's the right type to promote it to.
1220   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1221     assert(PVT != VT && "Don't know what type to promote to!");
1222 
1223     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1224 
1225     bool Replace = false;
1226     SDValue N0 = Op.getOperand(0);
1227     SDValue N1 = Op.getOperand(1);
1228     if (Opc == ISD::SRA)
1229       N0 = SExtPromoteOperand(N0, PVT);
1230     else if (Opc == ISD::SRL)
1231       N0 = ZExtPromoteOperand(N0, PVT);
1232     else
1233       N0 = PromoteOperand(N0, PVT, Replace);
1234 
1235     if (!N0.getNode())
1236       return SDValue();
1237 
1238     SDLoc DL(Op);
1239     SDValue RV =
1240         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1241 
1242     AddToWorklist(N0.getNode());
1243     if (Replace)
1244       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1245 
1246     // Deal with Op being deleted.
1247     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1248       return RV;
1249   }
1250   return SDValue();
1251 }
1252 
1253 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1254   if (!LegalOperations)
1255     return SDValue();
1256 
1257   EVT VT = Op.getValueType();
1258   if (VT.isVector() || !VT.isInteger())
1259     return SDValue();
1260 
1261   // If operation type is 'undesirable', e.g. i16 on x86, consider
1262   // promoting it.
1263   unsigned Opc = Op.getOpcode();
1264   if (TLI.isTypeDesirableForOp(Opc, VT))
1265     return SDValue();
1266 
1267   EVT PVT = VT;
1268   // Consult target whether it is a good idea to promote this operation and
1269   // what's the right type to promote it to.
1270   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271     assert(PVT != VT && "Don't know what type to promote to!");
1272     // fold (aext (aext x)) -> (aext x)
1273     // fold (aext (zext x)) -> (zext x)
1274     // fold (aext (sext x)) -> (sext x)
1275     DEBUG(dbgs() << "\nPromoting ";
1276           Op.getNode()->dump(&DAG));
1277     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1278   }
1279   return SDValue();
1280 }
1281 
1282 bool DAGCombiner::PromoteLoad(SDValue Op) {
1283   if (!LegalOperations)
1284     return false;
1285 
1286   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1287     return false;
1288 
1289   EVT VT = Op.getValueType();
1290   if (VT.isVector() || !VT.isInteger())
1291     return false;
1292 
1293   // If operation type is 'undesirable', e.g. i16 on x86, consider
1294   // promoting it.
1295   unsigned Opc = Op.getOpcode();
1296   if (TLI.isTypeDesirableForOp(Opc, VT))
1297     return false;
1298 
1299   EVT PVT = VT;
1300   // Consult target whether it is a good idea to promote this operation and
1301   // what's the right type to promote it to.
1302   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1303     assert(PVT != VT && "Don't know what type to promote to!");
1304 
1305     SDLoc DL(Op);
1306     SDNode *N = Op.getNode();
1307     LoadSDNode *LD = cast<LoadSDNode>(N);
1308     EVT MemVT = LD->getMemoryVT();
1309     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1310       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1311                                                        : ISD::EXTLOAD)
1312       : LD->getExtensionType();
1313     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1314                                    LD->getChain(), LD->getBasePtr(),
1315                                    MemVT, LD->getMemOperand());
1316     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1317 
1318     DEBUG(dbgs() << "\nPromoting ";
1319           N->dump(&DAG);
1320           dbgs() << "\nTo: ";
1321           Result.getNode()->dump(&DAG);
1322           dbgs() << '\n');
1323     WorklistRemover DeadNodes(*this);
1324     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1325     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1326     deleteAndRecombine(N);
1327     AddToWorklist(Result.getNode());
1328     return true;
1329   }
1330   return false;
1331 }
1332 
1333 /// \brief Recursively delete a node which has no uses and any operands for
1334 /// which it is the only use.
1335 ///
1336 /// Note that this both deletes the nodes and removes them from the worklist.
1337 /// It also adds any nodes who have had a user deleted to the worklist as they
1338 /// may now have only one use and subject to other combines.
1339 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1340   if (!N->use_empty())
1341     return false;
1342 
1343   SmallSetVector<SDNode *, 16> Nodes;
1344   Nodes.insert(N);
1345   do {
1346     N = Nodes.pop_back_val();
1347     if (!N)
1348       continue;
1349 
1350     if (N->use_empty()) {
1351       for (const SDValue &ChildN : N->op_values())
1352         Nodes.insert(ChildN.getNode());
1353 
1354       removeFromWorklist(N);
1355       DAG.DeleteNode(N);
1356     } else {
1357       AddToWorklist(N);
1358     }
1359   } while (!Nodes.empty());
1360   return true;
1361 }
1362 
1363 //===----------------------------------------------------------------------===//
1364 //  Main DAG Combiner implementation
1365 //===----------------------------------------------------------------------===//
1366 
1367 void DAGCombiner::Run(CombineLevel AtLevel) {
1368   // set the instance variables, so that the various visit routines may use it.
1369   Level = AtLevel;
1370   LegalOperations = Level >= AfterLegalizeVectorOps;
1371   LegalTypes = Level >= AfterLegalizeTypes;
1372 
1373   // Add all the dag nodes to the worklist.
1374   for (SDNode &Node : DAG.allnodes())
1375     AddToWorklist(&Node);
1376 
1377   // Create a dummy node (which is not added to allnodes), that adds a reference
1378   // to the root node, preventing it from being deleted, and tracking any
1379   // changes of the root.
1380   HandleSDNode Dummy(DAG.getRoot());
1381 
1382   // While the worklist isn't empty, find a node and try to combine it.
1383   while (!WorklistMap.empty()) {
1384     SDNode *N;
1385     // The Worklist holds the SDNodes in order, but it may contain null entries.
1386     do {
1387       N = Worklist.pop_back_val();
1388     } while (!N);
1389 
1390     bool GoodWorklistEntry = WorklistMap.erase(N);
1391     (void)GoodWorklistEntry;
1392     assert(GoodWorklistEntry &&
1393            "Found a worklist entry without a corresponding map entry!");
1394 
1395     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1396     // N is deleted from the DAG, since they too may now be dead or may have a
1397     // reduced number of uses, allowing other xforms.
1398     if (recursivelyDeleteUnusedNodes(N))
1399       continue;
1400 
1401     WorklistRemover DeadNodes(*this);
1402 
1403     // If this combine is running after legalizing the DAG, re-legalize any
1404     // nodes pulled off the worklist.
1405     if (Level == AfterLegalizeDAG) {
1406       SmallSetVector<SDNode *, 16> UpdatedNodes;
1407       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1408 
1409       for (SDNode *LN : UpdatedNodes) {
1410         AddToWorklist(LN);
1411         AddUsersToWorklist(LN);
1412       }
1413       if (!NIsValid)
1414         continue;
1415     }
1416 
1417     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1418 
1419     // Add any operands of the new node which have not yet been combined to the
1420     // worklist as well. Because the worklist uniques things already, this
1421     // won't repeatedly process the same operand.
1422     CombinedNodes.insert(N);
1423     for (const SDValue &ChildN : N->op_values())
1424       if (!CombinedNodes.count(ChildN.getNode()))
1425         AddToWorklist(ChildN.getNode());
1426 
1427     SDValue RV = combine(N);
1428 
1429     if (!RV.getNode())
1430       continue;
1431 
1432     ++NodesCombined;
1433 
1434     // If we get back the same node we passed in, rather than a new node or
1435     // zero, we know that the node must have defined multiple values and
1436     // CombineTo was used.  Since CombineTo takes care of the worklist
1437     // mechanics for us, we have no work to do in this case.
1438     if (RV.getNode() == N)
1439       continue;
1440 
1441     assert(N->getOpcode() != ISD::DELETED_NODE &&
1442            RV.getOpcode() != ISD::DELETED_NODE &&
1443            "Node was deleted but visit returned new node!");
1444 
1445     DEBUG(dbgs() << " ... into: ";
1446           RV.getNode()->dump(&DAG));
1447 
1448     if (N->getNumValues() == RV.getNode()->getNumValues())
1449       DAG.ReplaceAllUsesWith(N, RV.getNode());
1450     else {
1451       assert(N->getValueType(0) == RV.getValueType() &&
1452              N->getNumValues() == 1 && "Type mismatch");
1453       DAG.ReplaceAllUsesWith(N, &RV);
1454     }
1455 
1456     // Push the new node and any users onto the worklist
1457     AddToWorklist(RV.getNode());
1458     AddUsersToWorklist(RV.getNode());
1459 
1460     // Finally, if the node is now dead, remove it from the graph.  The node
1461     // may not be dead if the replacement process recursively simplified to
1462     // something else needing this node. This will also take care of adding any
1463     // operands which have lost a user to the worklist.
1464     recursivelyDeleteUnusedNodes(N);
1465   }
1466 
1467   // If the root changed (e.g. it was a dead load, update the root).
1468   DAG.setRoot(Dummy.getValue());
1469   DAG.RemoveDeadNodes();
1470 }
1471 
1472 SDValue DAGCombiner::visit(SDNode *N) {
1473   switch (N->getOpcode()) {
1474   default: break;
1475   case ISD::TokenFactor:        return visitTokenFactor(N);
1476   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1477   case ISD::ADD:                return visitADD(N);
1478   case ISD::SUB:                return visitSUB(N);
1479   case ISD::ADDC:               return visitADDC(N);
1480   case ISD::UADDO:              return visitUADDO(N);
1481   case ISD::SUBC:               return visitSUBC(N);
1482   case ISD::USUBO:              return visitUSUBO(N);
1483   case ISD::ADDE:               return visitADDE(N);
1484   case ISD::ADDCARRY:           return visitADDCARRY(N);
1485   case ISD::SUBE:               return visitSUBE(N);
1486   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1487   case ISD::MUL:                return visitMUL(N);
1488   case ISD::SDIV:               return visitSDIV(N);
1489   case ISD::UDIV:               return visitUDIV(N);
1490   case ISD::SREM:
1491   case ISD::UREM:               return visitREM(N);
1492   case ISD::MULHU:              return visitMULHU(N);
1493   case ISD::MULHS:              return visitMULHS(N);
1494   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1495   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1496   case ISD::SMULO:              return visitSMULO(N);
1497   case ISD::UMULO:              return visitUMULO(N);
1498   case ISD::SMIN:
1499   case ISD::SMAX:
1500   case ISD::UMIN:
1501   case ISD::UMAX:               return visitIMINMAX(N);
1502   case ISD::AND:                return visitAND(N);
1503   case ISD::OR:                 return visitOR(N);
1504   case ISD::XOR:                return visitXOR(N);
1505   case ISD::SHL:                return visitSHL(N);
1506   case ISD::SRA:                return visitSRA(N);
1507   case ISD::SRL:                return visitSRL(N);
1508   case ISD::ROTR:
1509   case ISD::ROTL:               return visitRotate(N);
1510   case ISD::ABS:                return visitABS(N);
1511   case ISD::BSWAP:              return visitBSWAP(N);
1512   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1513   case ISD::CTLZ:               return visitCTLZ(N);
1514   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1515   case ISD::CTTZ:               return visitCTTZ(N);
1516   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1517   case ISD::CTPOP:              return visitCTPOP(N);
1518   case ISD::SELECT:             return visitSELECT(N);
1519   case ISD::VSELECT:            return visitVSELECT(N);
1520   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1521   case ISD::SETCC:              return visitSETCC(N);
1522   case ISD::SETCCE:             return visitSETCCE(N);
1523   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1524   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1525   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1526   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1527   case ISD::AssertZext:         return visitAssertZext(N);
1528   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1529   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1530   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1531   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1532   case ISD::BITCAST:            return visitBITCAST(N);
1533   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1534   case ISD::FADD:               return visitFADD(N);
1535   case ISD::FSUB:               return visitFSUB(N);
1536   case ISD::FMUL:               return visitFMUL(N);
1537   case ISD::FMA:                return visitFMA(N);
1538   case ISD::FDIV:               return visitFDIV(N);
1539   case ISD::FREM:               return visitFREM(N);
1540   case ISD::FSQRT:              return visitFSQRT(N);
1541   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1542   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1543   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1544   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1545   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1546   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1547   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1548   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1549   case ISD::FNEG:               return visitFNEG(N);
1550   case ISD::FABS:               return visitFABS(N);
1551   case ISD::FFLOOR:             return visitFFLOOR(N);
1552   case ISD::FMINNUM:            return visitFMINNUM(N);
1553   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1554   case ISD::FCEIL:              return visitFCEIL(N);
1555   case ISD::FTRUNC:             return visitFTRUNC(N);
1556   case ISD::BRCOND:             return visitBRCOND(N);
1557   case ISD::BR_CC:              return visitBR_CC(N);
1558   case ISD::LOAD:               return visitLOAD(N);
1559   case ISD::STORE:              return visitSTORE(N);
1560   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1561   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1562   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1563   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1564   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1565   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1566   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1567   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1568   case ISD::MGATHER:            return visitMGATHER(N);
1569   case ISD::MLOAD:              return visitMLOAD(N);
1570   case ISD::MSCATTER:           return visitMSCATTER(N);
1571   case ISD::MSTORE:             return visitMSTORE(N);
1572   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1573   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1574   }
1575   return SDValue();
1576 }
1577 
1578 SDValue DAGCombiner::combine(SDNode *N) {
1579   SDValue RV = visit(N);
1580 
1581   // If nothing happened, try a target-specific DAG combine.
1582   if (!RV.getNode()) {
1583     assert(N->getOpcode() != ISD::DELETED_NODE &&
1584            "Node was deleted but visit returned NULL!");
1585 
1586     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1587         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1588 
1589       // Expose the DAG combiner to the target combiner impls.
1590       TargetLowering::DAGCombinerInfo
1591         DagCombineInfo(DAG, Level, false, this);
1592 
1593       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1594     }
1595   }
1596 
1597   // If nothing happened still, try promoting the operation.
1598   if (!RV.getNode()) {
1599     switch (N->getOpcode()) {
1600     default: break;
1601     case ISD::ADD:
1602     case ISD::SUB:
1603     case ISD::MUL:
1604     case ISD::AND:
1605     case ISD::OR:
1606     case ISD::XOR:
1607       RV = PromoteIntBinOp(SDValue(N, 0));
1608       break;
1609     case ISD::SHL:
1610     case ISD::SRA:
1611     case ISD::SRL:
1612       RV = PromoteIntShiftOp(SDValue(N, 0));
1613       break;
1614     case ISD::SIGN_EXTEND:
1615     case ISD::ZERO_EXTEND:
1616     case ISD::ANY_EXTEND:
1617       RV = PromoteExtend(SDValue(N, 0));
1618       break;
1619     case ISD::LOAD:
1620       if (PromoteLoad(SDValue(N, 0)))
1621         RV = SDValue(N, 0);
1622       break;
1623     }
1624   }
1625 
1626   // If N is a commutative binary node, try eliminate it if the commuted
1627   // version is already present in the DAG.
1628   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1629       N->getNumValues() == 1) {
1630     SDValue N0 = N->getOperand(0);
1631     SDValue N1 = N->getOperand(1);
1632 
1633     // Constant operands are canonicalized to RHS.
1634     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1635       SDValue Ops[] = {N1, N0};
1636       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1637                                             N->getFlags());
1638       if (CSENode)
1639         return SDValue(CSENode, 0);
1640     }
1641   }
1642 
1643   return RV;
1644 }
1645 
1646 /// Given a node, return its input chain if it has one, otherwise return a null
1647 /// sd operand.
1648 static SDValue getInputChainForNode(SDNode *N) {
1649   if (unsigned NumOps = N->getNumOperands()) {
1650     if (N->getOperand(0).getValueType() == MVT::Other)
1651       return N->getOperand(0);
1652     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1653       return N->getOperand(NumOps-1);
1654     for (unsigned i = 1; i < NumOps-1; ++i)
1655       if (N->getOperand(i).getValueType() == MVT::Other)
1656         return N->getOperand(i);
1657   }
1658   return SDValue();
1659 }
1660 
1661 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1662   // If N has two operands, where one has an input chain equal to the other,
1663   // the 'other' chain is redundant.
1664   if (N->getNumOperands() == 2) {
1665     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1666       return N->getOperand(0);
1667     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1668       return N->getOperand(1);
1669   }
1670 
1671   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1672   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1673   SmallPtrSet<SDNode*, 16> SeenOps;
1674   bool Changed = false;             // If we should replace this token factor.
1675 
1676   // Start out with this token factor.
1677   TFs.push_back(N);
1678 
1679   // Iterate through token factors.  The TFs grows when new token factors are
1680   // encountered.
1681   for (unsigned i = 0; i < TFs.size(); ++i) {
1682     SDNode *TF = TFs[i];
1683 
1684     // Check each of the operands.
1685     for (const SDValue &Op : TF->op_values()) {
1686 
1687       switch (Op.getOpcode()) {
1688       case ISD::EntryToken:
1689         // Entry tokens don't need to be added to the list. They are
1690         // redundant.
1691         Changed = true;
1692         break;
1693 
1694       case ISD::TokenFactor:
1695         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1696           // Queue up for processing.
1697           TFs.push_back(Op.getNode());
1698           // Clean up in case the token factor is removed.
1699           AddToWorklist(Op.getNode());
1700           Changed = true;
1701           break;
1702         }
1703         LLVM_FALLTHROUGH;
1704 
1705       default:
1706         // Only add if it isn't already in the list.
1707         if (SeenOps.insert(Op.getNode()).second)
1708           Ops.push_back(Op);
1709         else
1710           Changed = true;
1711         break;
1712       }
1713     }
1714   }
1715 
1716   // Remove Nodes that are chained to another node in the list. Do so
1717   // by walking up chains breath-first stopping when we've seen
1718   // another operand. In general we must climb to the EntryNode, but we can exit
1719   // early if we find all remaining work is associated with just one operand as
1720   // no further pruning is possible.
1721 
1722   // List of nodes to search through and original Ops from which they originate.
1723   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1724   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1725   SmallPtrSet<SDNode *, 16> SeenChains;
1726   bool DidPruneOps = false;
1727 
1728   unsigned NumLeftToConsider = 0;
1729   for (const SDValue &Op : Ops) {
1730     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1731     OpWorkCount.push_back(1);
1732   }
1733 
1734   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1735     // If this is an Op, we can remove the op from the list. Remark any
1736     // search associated with it as from the current OpNumber.
1737     if (SeenOps.count(Op) != 0) {
1738       Changed = true;
1739       DidPruneOps = true;
1740       unsigned OrigOpNumber = 0;
1741       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1742         OrigOpNumber++;
1743       assert((OrigOpNumber != Ops.size()) &&
1744              "expected to find TokenFactor Operand");
1745       // Re-mark worklist from OrigOpNumber to OpNumber
1746       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1747         if (Worklist[i].second == OrigOpNumber) {
1748           Worklist[i].second = OpNumber;
1749         }
1750       }
1751       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1752       OpWorkCount[OrigOpNumber] = 0;
1753       NumLeftToConsider--;
1754     }
1755     // Add if it's a new chain
1756     if (SeenChains.insert(Op).second) {
1757       OpWorkCount[OpNumber]++;
1758       Worklist.push_back(std::make_pair(Op, OpNumber));
1759     }
1760   };
1761 
1762   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1763     // We need at least be consider at least 2 Ops to prune.
1764     if (NumLeftToConsider <= 1)
1765       break;
1766     auto CurNode = Worklist[i].first;
1767     auto CurOpNumber = Worklist[i].second;
1768     assert((OpWorkCount[CurOpNumber] > 0) &&
1769            "Node should not appear in worklist");
1770     switch (CurNode->getOpcode()) {
1771     case ISD::EntryToken:
1772       // Hitting EntryToken is the only way for the search to terminate without
1773       // hitting
1774       // another operand's search. Prevent us from marking this operand
1775       // considered.
1776       NumLeftToConsider++;
1777       break;
1778     case ISD::TokenFactor:
1779       for (const SDValue &Op : CurNode->op_values())
1780         AddToWorklist(i, Op.getNode(), CurOpNumber);
1781       break;
1782     case ISD::CopyFromReg:
1783     case ISD::CopyToReg:
1784       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1785       break;
1786     default:
1787       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1788         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1789       break;
1790     }
1791     OpWorkCount[CurOpNumber]--;
1792     if (OpWorkCount[CurOpNumber] == 0)
1793       NumLeftToConsider--;
1794   }
1795 
1796   // If we've changed things around then replace token factor.
1797   if (Changed) {
1798     SDValue Result;
1799     if (Ops.empty()) {
1800       // The entry token is the only possible outcome.
1801       Result = DAG.getEntryNode();
1802     } else {
1803       if (DidPruneOps) {
1804         SmallVector<SDValue, 8> PrunedOps;
1805         //
1806         for (const SDValue &Op : Ops) {
1807           if (SeenChains.count(Op.getNode()) == 0)
1808             PrunedOps.push_back(Op);
1809         }
1810         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1811       } else {
1812         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1813       }
1814     }
1815     return Result;
1816   }
1817   return SDValue();
1818 }
1819 
1820 /// MERGE_VALUES can always be eliminated.
1821 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1822   WorklistRemover DeadNodes(*this);
1823   // Replacing results may cause a different MERGE_VALUES to suddenly
1824   // be CSE'd with N, and carry its uses with it. Iterate until no
1825   // uses remain, to ensure that the node can be safely deleted.
1826   // First add the users of this node to the work list so that they
1827   // can be tried again once they have new operands.
1828   AddUsersToWorklist(N);
1829   do {
1830     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1831       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1832   } while (!N->use_empty());
1833   deleteAndRecombine(N);
1834   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1835 }
1836 
1837 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1838 /// ConstantSDNode pointer else nullptr.
1839 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1840   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1841   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1842 }
1843 
1844 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1845   auto BinOpcode = BO->getOpcode();
1846   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1847           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1848           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1849           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1850           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1851           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1852           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1853           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1854           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1855          "Unexpected binary operator");
1856 
1857   // Bail out if any constants are opaque because we can't constant fold those.
1858   SDValue C1 = BO->getOperand(1);
1859   if (!isConstantOrConstantVector(C1, true) &&
1860       !isConstantFPBuildVectorOrConstantFP(C1))
1861     return SDValue();
1862 
1863   // Don't do this unless the old select is going away. We want to eliminate the
1864   // binary operator, not replace a binop with a select.
1865   // TODO: Handle ISD::SELECT_CC.
1866   SDValue Sel = BO->getOperand(0);
1867   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1868     return SDValue();
1869 
1870   SDValue CT = Sel.getOperand(1);
1871   if (!isConstantOrConstantVector(CT, true) &&
1872       !isConstantFPBuildVectorOrConstantFP(CT))
1873     return SDValue();
1874 
1875   SDValue CF = Sel.getOperand(2);
1876   if (!isConstantOrConstantVector(CF, true) &&
1877       !isConstantFPBuildVectorOrConstantFP(CF))
1878     return SDValue();
1879 
1880   // We have a select-of-constants followed by a binary operator with a
1881   // constant. Eliminate the binop by pulling the constant math into the select.
1882   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1883   EVT VT = Sel.getValueType();
1884   SDLoc DL(Sel);
1885   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1886   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1887           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1888          "Failed to constant fold a binop with constant operands");
1889 
1890   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1891   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1892           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1893          "Failed to constant fold a binop with constant operands");
1894 
1895   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1896 }
1897 
1898 SDValue DAGCombiner::visitADD(SDNode *N) {
1899   SDValue N0 = N->getOperand(0);
1900   SDValue N1 = N->getOperand(1);
1901   EVT VT = N0.getValueType();
1902   SDLoc DL(N);
1903 
1904   // fold vector ops
1905   if (VT.isVector()) {
1906     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1907       return FoldedVOp;
1908 
1909     // fold (add x, 0) -> x, vector edition
1910     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1911       return N0;
1912     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1913       return N1;
1914   }
1915 
1916   // fold (add x, undef) -> undef
1917   if (N0.isUndef())
1918     return N0;
1919 
1920   if (N1.isUndef())
1921     return N1;
1922 
1923   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1924     // canonicalize constant to RHS
1925     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1926       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1927     // fold (add c1, c2) -> c1+c2
1928     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1929                                       N1.getNode());
1930   }
1931 
1932   // fold (add x, 0) -> x
1933   if (isNullConstant(N1))
1934     return N0;
1935 
1936   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1937     // fold ((c1-A)+c2) -> (c1+c2)-A
1938     if (N0.getOpcode() == ISD::SUB &&
1939         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1940       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1941       return DAG.getNode(ISD::SUB, DL, VT,
1942                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1943                          N0.getOperand(1));
1944     }
1945 
1946     // add (sext i1 X), 1 -> zext (not i1 X)
1947     // We don't transform this pattern:
1948     //   add (zext i1 X), -1 -> sext (not i1 X)
1949     // because most (?) targets generate better code for the zext form.
1950     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1951         isOneConstantOrOneSplatConstant(N1)) {
1952       SDValue X = N0.getOperand(0);
1953       if ((!LegalOperations ||
1954            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1955             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1956           X.getScalarValueSizeInBits() == 1) {
1957         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1958         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1959       }
1960     }
1961 
1962     // Undo the add -> or combine to merge constant offsets from a frame index.
1963     if (N0.getOpcode() == ISD::OR &&
1964         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1965         isa<ConstantSDNode>(N0.getOperand(1)) &&
1966         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1967       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1968       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1969     }
1970   }
1971 
1972   if (SDValue NewSel = foldBinOpIntoSelect(N))
1973     return NewSel;
1974 
1975   // reassociate add
1976   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1977     return RADD;
1978 
1979   // fold ((0-A) + B) -> B-A
1980   if (N0.getOpcode() == ISD::SUB &&
1981       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1982     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1983 
1984   // fold (A + (0-B)) -> A-B
1985   if (N1.getOpcode() == ISD::SUB &&
1986       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1987     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1988 
1989   // fold (A+(B-A)) -> B
1990   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1991     return N1.getOperand(0);
1992 
1993   // fold ((B-A)+A) -> B
1994   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1995     return N0.getOperand(0);
1996 
1997   // fold (A+(B-(A+C))) to (B-C)
1998   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1999       N0 == N1.getOperand(1).getOperand(0))
2000     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2001                        N1.getOperand(1).getOperand(1));
2002 
2003   // fold (A+(B-(C+A))) to (B-C)
2004   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2005       N0 == N1.getOperand(1).getOperand(1))
2006     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2007                        N1.getOperand(1).getOperand(0));
2008 
2009   // fold (A+((B-A)+or-C)) to (B+or-C)
2010   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2011       N1.getOperand(0).getOpcode() == ISD::SUB &&
2012       N0 == N1.getOperand(0).getOperand(1))
2013     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2014                        N1.getOperand(1));
2015 
2016   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2017   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2018     SDValue N00 = N0.getOperand(0);
2019     SDValue N01 = N0.getOperand(1);
2020     SDValue N10 = N1.getOperand(0);
2021     SDValue N11 = N1.getOperand(1);
2022 
2023     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2024       return DAG.getNode(ISD::SUB, DL, VT,
2025                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2026                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2027   }
2028 
2029   if (SimplifyDemandedBits(SDValue(N, 0)))
2030     return SDValue(N, 0);
2031 
2032   // fold (a+b) -> (a|b) iff a and b share no bits.
2033   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2034       DAG.haveNoCommonBitsSet(N0, N1))
2035     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2036 
2037   if (SDValue Combined = visitADDLike(N0, N1, N))
2038     return Combined;
2039 
2040   if (SDValue Combined = visitADDLike(N1, N0, N))
2041     return Combined;
2042 
2043   return SDValue();
2044 }
2045 
2046 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2047   bool Masked = false;
2048 
2049   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2050   while (true) {
2051     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2052       V = V.getOperand(0);
2053       continue;
2054     }
2055 
2056     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2057       Masked = true;
2058       V = V.getOperand(0);
2059       continue;
2060     }
2061 
2062     break;
2063   }
2064 
2065   // If this is not a carry, return.
2066   if (V.getResNo() != 1)
2067     return SDValue();
2068 
2069   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2070       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2071     return SDValue();
2072 
2073   // If the result is masked, then no matter what kind of bool it is we can
2074   // return. If it isn't, then we need to make sure the bool type is either 0 or
2075   // 1 and not other values.
2076   if (Masked ||
2077       TLI.getBooleanContents(V.getValueType()) ==
2078           TargetLoweringBase::ZeroOrOneBooleanContent)
2079     return V;
2080 
2081   return SDValue();
2082 }
2083 
2084 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2085   EVT VT = N0.getValueType();
2086   SDLoc DL(LocReference);
2087 
2088   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2089   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2090       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2091     return DAG.getNode(ISD::SUB, DL, VT, N0,
2092                        DAG.getNode(ISD::SHL, DL, VT,
2093                                    N1.getOperand(0).getOperand(1),
2094                                    N1.getOperand(1)));
2095 
2096   if (N1.getOpcode() == ISD::AND) {
2097     SDValue AndOp0 = N1.getOperand(0);
2098     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2099     unsigned DestBits = VT.getScalarSizeInBits();
2100 
2101     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2102     // and similar xforms where the inner op is either ~0 or 0.
2103     if (NumSignBits == DestBits &&
2104         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2105       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2106   }
2107 
2108   // add (sext i1), X -> sub X, (zext i1)
2109   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2110       N0.getOperand(0).getValueType() == MVT::i1 &&
2111       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2112     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2113     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2114   }
2115 
2116   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2117   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2118     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2119     if (TN->getVT() == MVT::i1) {
2120       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2121                                  DAG.getConstant(1, DL, VT));
2122       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2123     }
2124   }
2125 
2126   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2127   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2128     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2129                        N0, N1.getOperand(0), N1.getOperand(2));
2130 
2131   // (add X, Carry) -> (addcarry X, 0, Carry)
2132   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2133     if (SDValue Carry = getAsCarry(TLI, N1))
2134       return DAG.getNode(ISD::ADDCARRY, DL,
2135                          DAG.getVTList(VT, Carry.getValueType()), N0,
2136                          DAG.getConstant(0, DL, VT), Carry);
2137 
2138   return SDValue();
2139 }
2140 
2141 SDValue DAGCombiner::visitADDC(SDNode *N) {
2142   SDValue N0 = N->getOperand(0);
2143   SDValue N1 = N->getOperand(1);
2144   EVT VT = N0.getValueType();
2145   SDLoc DL(N);
2146 
2147   // If the flag result is dead, turn this into an ADD.
2148   if (!N->hasAnyUseOfValue(1))
2149     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2150                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2151 
2152   // canonicalize constant to RHS.
2153   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2154   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2155   if (N0C && !N1C)
2156     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2157 
2158   // fold (addc x, 0) -> x + no carry out
2159   if (isNullConstant(N1))
2160     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2161                                         DL, MVT::Glue));
2162 
2163   // If it cannot overflow, transform into an add.
2164   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2165     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2166                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2167 
2168   return SDValue();
2169 }
2170 
2171 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2172   SDValue N0 = N->getOperand(0);
2173   SDValue N1 = N->getOperand(1);
2174   EVT VT = N0.getValueType();
2175   if (VT.isVector())
2176     return SDValue();
2177 
2178   EVT CarryVT = N->getValueType(1);
2179   SDLoc DL(N);
2180 
2181   // If the flag result is dead, turn this into an ADD.
2182   if (!N->hasAnyUseOfValue(1))
2183     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2184                      DAG.getUNDEF(CarryVT));
2185 
2186   // canonicalize constant to RHS.
2187   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2188   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2189   if (N0C && !N1C)
2190     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2191 
2192   // fold (uaddo x, 0) -> x + no carry out
2193   if (isNullConstant(N1))
2194     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2195 
2196   // If it cannot overflow, transform into an add.
2197   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2198     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2199                      DAG.getConstant(0, DL, CarryVT));
2200 
2201   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2202     return Combined;
2203 
2204   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2205     return Combined;
2206 
2207   return SDValue();
2208 }
2209 
2210 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2211   auto VT = N0.getValueType();
2212 
2213   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2214   // If Y + 1 cannot overflow.
2215   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2216     SDValue Y = N1.getOperand(0);
2217     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2218     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2219       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2220                          N1.getOperand(2));
2221   }
2222 
2223   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2224   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2225     if (SDValue Carry = getAsCarry(TLI, N1))
2226       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2227                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2228 
2229   return SDValue();
2230 }
2231 
2232 SDValue DAGCombiner::visitADDE(SDNode *N) {
2233   SDValue N0 = N->getOperand(0);
2234   SDValue N1 = N->getOperand(1);
2235   SDValue CarryIn = N->getOperand(2);
2236 
2237   // canonicalize constant to RHS
2238   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2239   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2240   if (N0C && !N1C)
2241     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2242                        N1, N0, CarryIn);
2243 
2244   // fold (adde x, y, false) -> (addc x, y)
2245   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2246     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2247 
2248   return SDValue();
2249 }
2250 
2251 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2252   SDValue N0 = N->getOperand(0);
2253   SDValue N1 = N->getOperand(1);
2254   SDValue CarryIn = N->getOperand(2);
2255   SDLoc DL(N);
2256 
2257   // canonicalize constant to RHS
2258   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2259   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2260   if (N0C && !N1C)
2261     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2262 
2263   // fold (addcarry x, y, false) -> (uaddo x, y)
2264   if (isNullConstant(CarryIn))
2265     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2266 
2267   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2268   if (isNullConstant(N0) && isNullConstant(N1)) {
2269     EVT VT = N0.getValueType();
2270     EVT CarryVT = CarryIn.getValueType();
2271     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2272     AddToWorklist(CarryExt.getNode());
2273     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2274                                     DAG.getConstant(1, DL, VT)),
2275                      DAG.getConstant(0, DL, CarryVT));
2276   }
2277 
2278   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2279     return Combined;
2280 
2281   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2282     return Combined;
2283 
2284   return SDValue();
2285 }
2286 
2287 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2288                                        SDNode *N) {
2289   // Iff the flag result is dead:
2290   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2291   if ((N0.getOpcode() == ISD::ADD ||
2292        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2293       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2294     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2295                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2296 
2297   /**
2298    * When one of the addcarry argument is itself a carry, we may be facing
2299    * a diamond carry propagation. In which case we try to transform the DAG
2300    * to ensure linear carry propagation if that is possible.
2301    *
2302    * We are trying to get:
2303    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2304    */
2305   if (auto Y = getAsCarry(TLI, N1)) {
2306     /**
2307      *            (uaddo A, B)
2308      *             /       \
2309      *          Carry      Sum
2310      *            |          \
2311      *            | (addcarry *, 0, Z)
2312      *            |       /
2313      *             \   Carry
2314      *              |   /
2315      * (addcarry X, *, *)
2316      */
2317     if (Y.getOpcode() == ISD::UADDO &&
2318         CarryIn.getResNo() == 1 &&
2319         CarryIn.getOpcode() == ISD::ADDCARRY &&
2320         isNullConstant(CarryIn.getOperand(1)) &&
2321         CarryIn.getOperand(0) == Y.getValue(0)) {
2322       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2323                               Y.getOperand(0), Y.getOperand(1),
2324                               CarryIn.getOperand(2));
2325       AddToWorklist(NewY.getNode());
2326       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2327                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2328                          NewY.getValue(1));
2329     }
2330   }
2331 
2332   return SDValue();
2333 }
2334 
2335 // Since it may not be valid to emit a fold to zero for vector initializers
2336 // check if we can before folding.
2337 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2338                              SelectionDAG &DAG, bool LegalOperations,
2339                              bool LegalTypes) {
2340   if (!VT.isVector())
2341     return DAG.getConstant(0, DL, VT);
2342   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2343     return DAG.getConstant(0, DL, VT);
2344   return SDValue();
2345 }
2346 
2347 SDValue DAGCombiner::visitSUB(SDNode *N) {
2348   SDValue N0 = N->getOperand(0);
2349   SDValue N1 = N->getOperand(1);
2350   EVT VT = N0.getValueType();
2351   SDLoc DL(N);
2352 
2353   // fold vector ops
2354   if (VT.isVector()) {
2355     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2356       return FoldedVOp;
2357 
2358     // fold (sub x, 0) -> x, vector edition
2359     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2360       return N0;
2361   }
2362 
2363   // fold (sub x, x) -> 0
2364   // FIXME: Refactor this and xor and other similar operations together.
2365   if (N0 == N1)
2366     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2367   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2368       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2369     // fold (sub c1, c2) -> c1-c2
2370     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2371                                       N1.getNode());
2372   }
2373 
2374   if (SDValue NewSel = foldBinOpIntoSelect(N))
2375     return NewSel;
2376 
2377   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2378 
2379   // fold (sub x, c) -> (add x, -c)
2380   if (N1C) {
2381     return DAG.getNode(ISD::ADD, DL, VT, N0,
2382                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2383   }
2384 
2385   if (isNullConstantOrNullSplatConstant(N0)) {
2386     unsigned BitWidth = VT.getScalarSizeInBits();
2387     // Right-shifting everything out but the sign bit followed by negation is
2388     // the same as flipping arithmetic/logical shift type without the negation:
2389     // -(X >>u 31) -> (X >>s 31)
2390     // -(X >>s 31) -> (X >>u 31)
2391     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2392       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2393       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2394         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2395         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2396           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2397       }
2398     }
2399 
2400     // 0 - X --> 0 if the sub is NUW.
2401     if (N->getFlags().hasNoUnsignedWrap())
2402       return N0;
2403 
2404     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2405       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2406       // N1 must be 0 because negating the minimum signed value is undefined.
2407       if (N->getFlags().hasNoSignedWrap())
2408         return N0;
2409 
2410       // 0 - X --> X if X is 0 or the minimum signed value.
2411       return N1;
2412     }
2413   }
2414 
2415   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2416   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2417     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2418 
2419   // fold A-(A-B) -> B
2420   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2421     return N1.getOperand(1);
2422 
2423   // fold (A+B)-A -> B
2424   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2425     return N0.getOperand(1);
2426 
2427   // fold (A+B)-B -> A
2428   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2429     return N0.getOperand(0);
2430 
2431   // fold C2-(A+C1) -> (C2-C1)-A
2432   if (N1.getOpcode() == ISD::ADD) {
2433     SDValue N11 = N1.getOperand(1);
2434     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2435         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2436       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2437       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2438     }
2439   }
2440 
2441   // fold ((A+(B+or-C))-B) -> A+or-C
2442   if (N0.getOpcode() == ISD::ADD &&
2443       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2444        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2445       N0.getOperand(1).getOperand(0) == N1)
2446     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2447                        N0.getOperand(1).getOperand(1));
2448 
2449   // fold ((A+(C+B))-B) -> A+C
2450   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2451       N0.getOperand(1).getOperand(1) == N1)
2452     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2453                        N0.getOperand(1).getOperand(0));
2454 
2455   // fold ((A-(B-C))-C) -> A-B
2456   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2457       N0.getOperand(1).getOperand(1) == N1)
2458     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2459                        N0.getOperand(1).getOperand(0));
2460 
2461   // If either operand of a sub is undef, the result is undef
2462   if (N0.isUndef())
2463     return N0;
2464   if (N1.isUndef())
2465     return N1;
2466 
2467   // If the relocation model supports it, consider symbol offsets.
2468   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2469     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2470       // fold (sub Sym, c) -> Sym-c
2471       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2472         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2473                                     GA->getOffset() -
2474                                         (uint64_t)N1C->getSExtValue());
2475       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2476       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2477         if (GA->getGlobal() == GB->getGlobal())
2478           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2479                                  DL, VT);
2480     }
2481 
2482   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2483   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2484     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2485     if (TN->getVT() == MVT::i1) {
2486       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2487                                  DAG.getConstant(1, DL, VT));
2488       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2489     }
2490   }
2491 
2492   return SDValue();
2493 }
2494 
2495 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2496   SDValue N0 = N->getOperand(0);
2497   SDValue N1 = N->getOperand(1);
2498   EVT VT = N0.getValueType();
2499   SDLoc DL(N);
2500 
2501   // If the flag result is dead, turn this into an SUB.
2502   if (!N->hasAnyUseOfValue(1))
2503     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2504                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2505 
2506   // fold (subc x, x) -> 0 + no borrow
2507   if (N0 == N1)
2508     return CombineTo(N, DAG.getConstant(0, DL, VT),
2509                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2510 
2511   // fold (subc x, 0) -> x + no borrow
2512   if (isNullConstant(N1))
2513     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2514 
2515   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2516   if (isAllOnesConstant(N0))
2517     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2518                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2519 
2520   return SDValue();
2521 }
2522 
2523 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2524   SDValue N0 = N->getOperand(0);
2525   SDValue N1 = N->getOperand(1);
2526   EVT VT = N0.getValueType();
2527   if (VT.isVector())
2528     return SDValue();
2529 
2530   EVT CarryVT = N->getValueType(1);
2531   SDLoc DL(N);
2532 
2533   // If the flag result is dead, turn this into an SUB.
2534   if (!N->hasAnyUseOfValue(1))
2535     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2536                      DAG.getUNDEF(CarryVT));
2537 
2538   // fold (usubo x, x) -> 0 + no borrow
2539   if (N0 == N1)
2540     return CombineTo(N, DAG.getConstant(0, DL, VT),
2541                      DAG.getConstant(0, DL, CarryVT));
2542 
2543   // fold (usubo x, 0) -> x + no borrow
2544   if (isNullConstant(N1))
2545     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2546 
2547   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2548   if (isAllOnesConstant(N0))
2549     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2550                      DAG.getConstant(0, DL, CarryVT));
2551 
2552   return SDValue();
2553 }
2554 
2555 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2556   SDValue N0 = N->getOperand(0);
2557   SDValue N1 = N->getOperand(1);
2558   SDValue CarryIn = N->getOperand(2);
2559 
2560   // fold (sube x, y, false) -> (subc x, y)
2561   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2562     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2563 
2564   return SDValue();
2565 }
2566 
2567 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2568   SDValue N0 = N->getOperand(0);
2569   SDValue N1 = N->getOperand(1);
2570   SDValue CarryIn = N->getOperand(2);
2571 
2572   // fold (subcarry x, y, false) -> (usubo x, y)
2573   if (isNullConstant(CarryIn))
2574     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2575 
2576   return SDValue();
2577 }
2578 
2579 SDValue DAGCombiner::visitMUL(SDNode *N) {
2580   SDValue N0 = N->getOperand(0);
2581   SDValue N1 = N->getOperand(1);
2582   EVT VT = N0.getValueType();
2583 
2584   // fold (mul x, undef) -> 0
2585   if (N0.isUndef() || N1.isUndef())
2586     return DAG.getConstant(0, SDLoc(N), VT);
2587 
2588   bool N0IsConst = false;
2589   bool N1IsConst = false;
2590   bool N1IsOpaqueConst = false;
2591   bool N0IsOpaqueConst = false;
2592   APInt ConstValue0, ConstValue1;
2593   // fold vector ops
2594   if (VT.isVector()) {
2595     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2596       return FoldedVOp;
2597 
2598     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2599     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2600     assert((!N0IsConst ||
2601             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2602            "Splat APInt should be element width");
2603     assert((!N1IsConst ||
2604             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2605            "Splat APInt should be element width");
2606   } else {
2607     N0IsConst = isa<ConstantSDNode>(N0);
2608     if (N0IsConst) {
2609       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2610       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2611     }
2612     N1IsConst = isa<ConstantSDNode>(N1);
2613     if (N1IsConst) {
2614       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2615       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2616     }
2617   }
2618 
2619   // fold (mul c1, c2) -> c1*c2
2620   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2621     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2622                                       N0.getNode(), N1.getNode());
2623 
2624   // canonicalize constant to RHS (vector doesn't have to splat)
2625   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2626      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2627     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2628   // fold (mul x, 0) -> 0
2629   if (N1IsConst && ConstValue1.isNullValue())
2630     return N1;
2631   // fold (mul x, 1) -> x
2632   if (N1IsConst && ConstValue1.isOneValue())
2633     return N0;
2634 
2635   if (SDValue NewSel = foldBinOpIntoSelect(N))
2636     return NewSel;
2637 
2638   // fold (mul x, -1) -> 0-x
2639   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2640     SDLoc DL(N);
2641     return DAG.getNode(ISD::SUB, DL, VT,
2642                        DAG.getConstant(0, DL, VT), N0);
2643   }
2644   // fold (mul x, (1 << c)) -> x << c
2645   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2646       DAG.isKnownToBeAPowerOfTwo(N1)) {
2647     SDLoc DL(N);
2648     SDValue LogBase2 = BuildLogBase2(N1, DL);
2649     AddToWorklist(LogBase2.getNode());
2650 
2651     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2652     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2653     AddToWorklist(Trunc.getNode());
2654     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2655   }
2656   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2657   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2658     unsigned Log2Val = (-ConstValue1).logBase2();
2659     SDLoc DL(N);
2660     // FIXME: If the input is something that is easily negated (e.g. a
2661     // single-use add), we should put the negate there.
2662     return DAG.getNode(ISD::SUB, DL, VT,
2663                        DAG.getConstant(0, DL, VT),
2664                        DAG.getNode(ISD::SHL, DL, VT, N0,
2665                             DAG.getConstant(Log2Val, DL,
2666                                       getShiftAmountTy(N0.getValueType()))));
2667   }
2668 
2669   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2670   if (N0.getOpcode() == ISD::SHL &&
2671       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2672       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2673     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2674     if (isConstantOrConstantVector(C3))
2675       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2676   }
2677 
2678   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2679   // use.
2680   {
2681     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2682 
2683     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2684     if (N0.getOpcode() == ISD::SHL &&
2685         isConstantOrConstantVector(N0.getOperand(1)) &&
2686         N0.getNode()->hasOneUse()) {
2687       Sh = N0; Y = N1;
2688     } else if (N1.getOpcode() == ISD::SHL &&
2689                isConstantOrConstantVector(N1.getOperand(1)) &&
2690                N1.getNode()->hasOneUse()) {
2691       Sh = N1; Y = N0;
2692     }
2693 
2694     if (Sh.getNode()) {
2695       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2696       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2697     }
2698   }
2699 
2700   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2701   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2702       N0.getOpcode() == ISD::ADD &&
2703       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2704       isMulAddWithConstProfitable(N, N0, N1))
2705       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2706                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2707                                      N0.getOperand(0), N1),
2708                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2709                                      N0.getOperand(1), N1));
2710 
2711   // reassociate mul
2712   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2713     return RMUL;
2714 
2715   return SDValue();
2716 }
2717 
2718 /// Return true if divmod libcall is available.
2719 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2720                                      const TargetLowering &TLI) {
2721   RTLIB::Libcall LC;
2722   EVT NodeType = Node->getValueType(0);
2723   if (!NodeType.isSimple())
2724     return false;
2725   switch (NodeType.getSimpleVT().SimpleTy) {
2726   default: return false; // No libcall for vector types.
2727   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2728   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2729   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2730   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2731   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2732   }
2733 
2734   return TLI.getLibcallName(LC) != nullptr;
2735 }
2736 
2737 /// Issue divrem if both quotient and remainder are needed.
2738 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2739   if (Node->use_empty())
2740     return SDValue(); // This is a dead node, leave it alone.
2741 
2742   unsigned Opcode = Node->getOpcode();
2743   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2744   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2745 
2746   // DivMod lib calls can still work on non-legal types if using lib-calls.
2747   EVT VT = Node->getValueType(0);
2748   if (VT.isVector() || !VT.isInteger())
2749     return SDValue();
2750 
2751   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2752     return SDValue();
2753 
2754   // If DIVREM is going to get expanded into a libcall,
2755   // but there is no libcall available, then don't combine.
2756   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2757       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2758     return SDValue();
2759 
2760   // If div is legal, it's better to do the normal expansion
2761   unsigned OtherOpcode = 0;
2762   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2763     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2764     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2765       return SDValue();
2766   } else {
2767     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2768     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2769       return SDValue();
2770   }
2771 
2772   SDValue Op0 = Node->getOperand(0);
2773   SDValue Op1 = Node->getOperand(1);
2774   SDValue combined;
2775   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2776          UE = Op0.getNode()->use_end(); UI != UE;) {
2777     SDNode *User = *UI++;
2778     if (User == Node || User->use_empty())
2779       continue;
2780     // Convert the other matching node(s), too;
2781     // otherwise, the DIVREM may get target-legalized into something
2782     // target-specific that we won't be able to recognize.
2783     unsigned UserOpc = User->getOpcode();
2784     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2785         User->getOperand(0) == Op0 &&
2786         User->getOperand(1) == Op1) {
2787       if (!combined) {
2788         if (UserOpc == OtherOpcode) {
2789           SDVTList VTs = DAG.getVTList(VT, VT);
2790           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2791         } else if (UserOpc == DivRemOpc) {
2792           combined = SDValue(User, 0);
2793         } else {
2794           assert(UserOpc == Opcode);
2795           continue;
2796         }
2797       }
2798       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2799         CombineTo(User, combined);
2800       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2801         CombineTo(User, combined.getValue(1));
2802     }
2803   }
2804   return combined;
2805 }
2806 
2807 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2808   SDValue N0 = N->getOperand(0);
2809   SDValue N1 = N->getOperand(1);
2810   EVT VT = N->getValueType(0);
2811   SDLoc DL(N);
2812 
2813   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2814     return DAG.getUNDEF(VT);
2815 
2816   // undef / X -> 0
2817   // undef % X -> 0
2818   if (N0.isUndef())
2819     return DAG.getConstant(0, DL, VT);
2820 
2821   return SDValue();
2822 }
2823 
2824 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2825   SDValue N0 = N->getOperand(0);
2826   SDValue N1 = N->getOperand(1);
2827   EVT VT = N->getValueType(0);
2828 
2829   // fold vector ops
2830   if (VT.isVector())
2831     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2832       return FoldedVOp;
2833 
2834   SDLoc DL(N);
2835 
2836   // fold (sdiv c1, c2) -> c1/c2
2837   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2838   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2839   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2840     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2841   // fold (sdiv X, 1) -> X
2842   if (N1C && N1C->isOne())
2843     return N0;
2844   // fold (sdiv X, -1) -> 0-X
2845   if (N1C && N1C->isAllOnesValue())
2846     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2847 
2848   if (SDValue V = simplifyDivRem(N, DAG))
2849     return V;
2850 
2851   if (SDValue NewSel = foldBinOpIntoSelect(N))
2852     return NewSel;
2853 
2854   // If we know the sign bits of both operands are zero, strength reduce to a
2855   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2856   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2857     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2858 
2859   // fold (sdiv X, pow2) -> simple ops after legalize
2860   // FIXME: We check for the exact bit here because the generic lowering gives
2861   // better results in that case. The target-specific lowering should learn how
2862   // to handle exact sdivs efficiently.
2863   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2864       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2865                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2866     // Target-specific implementation of sdiv x, pow2.
2867     if (SDValue Res = BuildSDIVPow2(N))
2868       return Res;
2869 
2870     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2871 
2872     // Splat the sign bit into the register
2873     SDValue SGN =
2874         DAG.getNode(ISD::SRA, DL, VT, N0,
2875                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2876                                     getShiftAmountTy(N0.getValueType())));
2877     AddToWorklist(SGN.getNode());
2878 
2879     // Add (N0 < 0) ? abs2 - 1 : 0;
2880     SDValue SRL =
2881         DAG.getNode(ISD::SRL, DL, VT, SGN,
2882                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2883                                     getShiftAmountTy(SGN.getValueType())));
2884     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2885     AddToWorklist(SRL.getNode());
2886     AddToWorklist(ADD.getNode());    // Divide by pow2
2887     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2888                   DAG.getConstant(lg2, DL,
2889                                   getShiftAmountTy(ADD.getValueType())));
2890 
2891     // If we're dividing by a positive value, we're done.  Otherwise, we must
2892     // negate the result.
2893     if (N1C->getAPIntValue().isNonNegative())
2894       return SRA;
2895 
2896     AddToWorklist(SRA.getNode());
2897     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2898   }
2899 
2900   // If integer divide is expensive and we satisfy the requirements, emit an
2901   // alternate sequence.  Targets may check function attributes for size/speed
2902   // trade-offs.
2903   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2904   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2905     if (SDValue Op = BuildSDIV(N))
2906       return Op;
2907 
2908   // sdiv, srem -> sdivrem
2909   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2910   // true.  Otherwise, we break the simplification logic in visitREM().
2911   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2912     if (SDValue DivRem = useDivRem(N))
2913         return DivRem;
2914 
2915   return SDValue();
2916 }
2917 
2918 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2919   SDValue N0 = N->getOperand(0);
2920   SDValue N1 = N->getOperand(1);
2921   EVT VT = N->getValueType(0);
2922 
2923   // fold vector ops
2924   if (VT.isVector())
2925     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2926       return FoldedVOp;
2927 
2928   SDLoc DL(N);
2929 
2930   // fold (udiv c1, c2) -> c1/c2
2931   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2932   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2933   if (N0C && N1C)
2934     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2935                                                     N0C, N1C))
2936       return Folded;
2937 
2938   if (SDValue V = simplifyDivRem(N, DAG))
2939     return V;
2940 
2941   if (SDValue NewSel = foldBinOpIntoSelect(N))
2942     return NewSel;
2943 
2944   // fold (udiv x, (1 << c)) -> x >>u c
2945   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2946       DAG.isKnownToBeAPowerOfTwo(N1)) {
2947     SDValue LogBase2 = BuildLogBase2(N1, DL);
2948     AddToWorklist(LogBase2.getNode());
2949 
2950     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2951     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2952     AddToWorklist(Trunc.getNode());
2953     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2954   }
2955 
2956   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2957   if (N1.getOpcode() == ISD::SHL) {
2958     SDValue N10 = N1.getOperand(0);
2959     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2960         DAG.isKnownToBeAPowerOfTwo(N10)) {
2961       SDValue LogBase2 = BuildLogBase2(N10, DL);
2962       AddToWorklist(LogBase2.getNode());
2963 
2964       EVT ADDVT = N1.getOperand(1).getValueType();
2965       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2966       AddToWorklist(Trunc.getNode());
2967       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2968       AddToWorklist(Add.getNode());
2969       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2970     }
2971   }
2972 
2973   // fold (udiv x, c) -> alternate
2974   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2975   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2976     if (SDValue Op = BuildUDIV(N))
2977       return Op;
2978 
2979   // sdiv, srem -> sdivrem
2980   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2981   // true.  Otherwise, we break the simplification logic in visitREM().
2982   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2983     if (SDValue DivRem = useDivRem(N))
2984         return DivRem;
2985 
2986   return SDValue();
2987 }
2988 
2989 // handles ISD::SREM and ISD::UREM
2990 SDValue DAGCombiner::visitREM(SDNode *N) {
2991   unsigned Opcode = N->getOpcode();
2992   SDValue N0 = N->getOperand(0);
2993   SDValue N1 = N->getOperand(1);
2994   EVT VT = N->getValueType(0);
2995   bool isSigned = (Opcode == ISD::SREM);
2996   SDLoc DL(N);
2997 
2998   // fold (rem c1, c2) -> c1%c2
2999   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3000   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3001   if (N0C && N1C)
3002     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3003       return Folded;
3004 
3005   if (SDValue V = simplifyDivRem(N, DAG))
3006     return V;
3007 
3008   if (SDValue NewSel = foldBinOpIntoSelect(N))
3009     return NewSel;
3010 
3011   if (isSigned) {
3012     // If we know the sign bits of both operands are zero, strength reduce to a
3013     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3014     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3015       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3016   } else {
3017     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3018     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3019       // fold (urem x, pow2) -> (and x, pow2-1)
3020       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3021       AddToWorklist(Add.getNode());
3022       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3023     }
3024     if (N1.getOpcode() == ISD::SHL &&
3025         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3026       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3027       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3028       AddToWorklist(Add.getNode());
3029       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3030     }
3031   }
3032 
3033   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
3034 
3035   // If X/C can be simplified by the division-by-constant logic, lower
3036   // X%C to the equivalent of X-X/C*C.
3037   // To avoid mangling nodes, this simplification requires that the combine()
3038   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3039   // against this by skipping the simplification if isIntDivCheap().  When
3040   // div is not cheap, combine will not return a DIVREM.  Regardless,
3041   // checking cheapness here makes sense since the simplification results in
3042   // fatter code.
3043   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3044     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3045     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3046     AddToWorklist(Div.getNode());
3047     SDValue OptimizedDiv = combine(Div.getNode());
3048     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3049       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3050              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3051       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3052       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3053       AddToWorklist(Mul.getNode());
3054       return Sub;
3055     }
3056   }
3057 
3058   // sdiv, srem -> sdivrem
3059   if (SDValue DivRem = useDivRem(N))
3060     return DivRem.getValue(1);
3061 
3062   return SDValue();
3063 }
3064 
3065 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3066   SDValue N0 = N->getOperand(0);
3067   SDValue N1 = N->getOperand(1);
3068   EVT VT = N->getValueType(0);
3069   SDLoc DL(N);
3070 
3071   // fold (mulhs x, 0) -> 0
3072   if (isNullConstant(N1))
3073     return N1;
3074   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3075   if (isOneConstant(N1)) {
3076     SDLoc DL(N);
3077     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3078                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3079                                        getShiftAmountTy(N0.getValueType())));
3080   }
3081   // fold (mulhs x, undef) -> 0
3082   if (N0.isUndef() || N1.isUndef())
3083     return DAG.getConstant(0, SDLoc(N), VT);
3084 
3085   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3086   // plus a shift.
3087   if (VT.isSimple() && !VT.isVector()) {
3088     MVT Simple = VT.getSimpleVT();
3089     unsigned SimpleSize = Simple.getSizeInBits();
3090     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3091     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3092       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3093       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3094       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3095       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3096             DAG.getConstant(SimpleSize, DL,
3097                             getShiftAmountTy(N1.getValueType())));
3098       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3099     }
3100   }
3101 
3102   return SDValue();
3103 }
3104 
3105 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3106   SDValue N0 = N->getOperand(0);
3107   SDValue N1 = N->getOperand(1);
3108   EVT VT = N->getValueType(0);
3109   SDLoc DL(N);
3110 
3111   // fold (mulhu x, 0) -> 0
3112   if (isNullConstant(N1))
3113     return N1;
3114   // fold (mulhu x, 1) -> 0
3115   if (isOneConstant(N1))
3116     return DAG.getConstant(0, DL, N0.getValueType());
3117   // fold (mulhu x, undef) -> 0
3118   if (N0.isUndef() || N1.isUndef())
3119     return DAG.getConstant(0, DL, VT);
3120 
3121   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3122   // plus a shift.
3123   if (VT.isSimple() && !VT.isVector()) {
3124     MVT Simple = VT.getSimpleVT();
3125     unsigned SimpleSize = Simple.getSizeInBits();
3126     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3127     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3128       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3129       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3130       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3131       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3132             DAG.getConstant(SimpleSize, DL,
3133                             getShiftAmountTy(N1.getValueType())));
3134       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3135     }
3136   }
3137 
3138   return SDValue();
3139 }
3140 
3141 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3142 /// give the opcodes for the two computations that are being performed. Return
3143 /// true if a simplification was made.
3144 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3145                                                 unsigned HiOp) {
3146   // If the high half is not needed, just compute the low half.
3147   bool HiExists = N->hasAnyUseOfValue(1);
3148   if (!HiExists &&
3149       (!LegalOperations ||
3150        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3151     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3152     return CombineTo(N, Res, Res);
3153   }
3154 
3155   // If the low half is not needed, just compute the high half.
3156   bool LoExists = N->hasAnyUseOfValue(0);
3157   if (!LoExists &&
3158       (!LegalOperations ||
3159        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3160     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3161     return CombineTo(N, Res, Res);
3162   }
3163 
3164   // If both halves are used, return as it is.
3165   if (LoExists && HiExists)
3166     return SDValue();
3167 
3168   // If the two computed results can be simplified separately, separate them.
3169   if (LoExists) {
3170     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3171     AddToWorklist(Lo.getNode());
3172     SDValue LoOpt = combine(Lo.getNode());
3173     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3174         (!LegalOperations ||
3175          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3176       return CombineTo(N, LoOpt, LoOpt);
3177   }
3178 
3179   if (HiExists) {
3180     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3181     AddToWorklist(Hi.getNode());
3182     SDValue HiOpt = combine(Hi.getNode());
3183     if (HiOpt.getNode() && HiOpt != Hi &&
3184         (!LegalOperations ||
3185          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3186       return CombineTo(N, HiOpt, HiOpt);
3187   }
3188 
3189   return SDValue();
3190 }
3191 
3192 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3193   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3194     return Res;
3195 
3196   EVT VT = N->getValueType(0);
3197   SDLoc DL(N);
3198 
3199   // If the type is twice as wide is legal, transform the mulhu to a wider
3200   // multiply plus a shift.
3201   if (VT.isSimple() && !VT.isVector()) {
3202     MVT Simple = VT.getSimpleVT();
3203     unsigned SimpleSize = Simple.getSizeInBits();
3204     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3205     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3206       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3207       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3208       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3209       // Compute the high part as N1.
3210       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3211             DAG.getConstant(SimpleSize, DL,
3212                             getShiftAmountTy(Lo.getValueType())));
3213       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3214       // Compute the low part as N0.
3215       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3216       return CombineTo(N, Lo, Hi);
3217     }
3218   }
3219 
3220   return SDValue();
3221 }
3222 
3223 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3224   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3225     return Res;
3226 
3227   EVT VT = N->getValueType(0);
3228   SDLoc DL(N);
3229 
3230   // If the type is twice as wide is legal, transform the mulhu to a wider
3231   // multiply plus a shift.
3232   if (VT.isSimple() && !VT.isVector()) {
3233     MVT Simple = VT.getSimpleVT();
3234     unsigned SimpleSize = Simple.getSizeInBits();
3235     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3236     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3237       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3238       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3239       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3240       // Compute the high part as N1.
3241       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3242             DAG.getConstant(SimpleSize, DL,
3243                             getShiftAmountTy(Lo.getValueType())));
3244       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3245       // Compute the low part as N0.
3246       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3247       return CombineTo(N, Lo, Hi);
3248     }
3249   }
3250 
3251   return SDValue();
3252 }
3253 
3254 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3255   // (smulo x, 2) -> (saddo x, x)
3256   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3257     if (C2->getAPIntValue() == 2)
3258       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3259                          N->getOperand(0), N->getOperand(0));
3260 
3261   return SDValue();
3262 }
3263 
3264 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3265   // (umulo x, 2) -> (uaddo x, x)
3266   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3267     if (C2->getAPIntValue() == 2)
3268       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3269                          N->getOperand(0), N->getOperand(0));
3270 
3271   return SDValue();
3272 }
3273 
3274 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3275   SDValue N0 = N->getOperand(0);
3276   SDValue N1 = N->getOperand(1);
3277   EVT VT = N0.getValueType();
3278 
3279   // fold vector ops
3280   if (VT.isVector())
3281     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3282       return FoldedVOp;
3283 
3284   // fold operation with constant operands.
3285   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3286   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3287   if (N0C && N1C)
3288     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3289 
3290   // canonicalize constant to RHS
3291   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3292      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3293     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3294 
3295   return SDValue();
3296 }
3297 
3298 /// If this is a binary operator with two operands of the same opcode, try to
3299 /// simplify it.
3300 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3301   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3302   EVT VT = N0.getValueType();
3303   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3304 
3305   // Bail early if none of these transforms apply.
3306   if (N0.getNumOperands() == 0) return SDValue();
3307 
3308   // For each of OP in AND/OR/XOR:
3309   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3310   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3311   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3312   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3313   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3314   //
3315   // do not sink logical op inside of a vector extend, since it may combine
3316   // into a vsetcc.
3317   EVT Op0VT = N0.getOperand(0).getValueType();
3318   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3319        N0.getOpcode() == ISD::SIGN_EXTEND ||
3320        N0.getOpcode() == ISD::BSWAP ||
3321        // Avoid infinite looping with PromoteIntBinOp.
3322        (N0.getOpcode() == ISD::ANY_EXTEND &&
3323         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3324        (N0.getOpcode() == ISD::TRUNCATE &&
3325         (!TLI.isZExtFree(VT, Op0VT) ||
3326          !TLI.isTruncateFree(Op0VT, VT)) &&
3327         TLI.isTypeLegal(Op0VT))) &&
3328       !VT.isVector() &&
3329       Op0VT == N1.getOperand(0).getValueType() &&
3330       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3331     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3332                                  N0.getOperand(0).getValueType(),
3333                                  N0.getOperand(0), N1.getOperand(0));
3334     AddToWorklist(ORNode.getNode());
3335     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3336   }
3337 
3338   // For each of OP in SHL/SRL/SRA/AND...
3339   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3340   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3341   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3342   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3343        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3344       N0.getOperand(1) == N1.getOperand(1)) {
3345     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3346                                  N0.getOperand(0).getValueType(),
3347                                  N0.getOperand(0), N1.getOperand(0));
3348     AddToWorklist(ORNode.getNode());
3349     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3350                        ORNode, N0.getOperand(1));
3351   }
3352 
3353   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3354   // Only perform this optimization up until type legalization, before
3355   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3356   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3357   // we don't want to undo this promotion.
3358   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3359   // on scalars.
3360   if ((N0.getOpcode() == ISD::BITCAST ||
3361        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3362        Level <= AfterLegalizeTypes) {
3363     SDValue In0 = N0.getOperand(0);
3364     SDValue In1 = N1.getOperand(0);
3365     EVT In0Ty = In0.getValueType();
3366     EVT In1Ty = In1.getValueType();
3367     SDLoc DL(N);
3368     // If both incoming values are integers, and the original types are the
3369     // same.
3370     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3371       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3372       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3373       AddToWorklist(Op.getNode());
3374       return BC;
3375     }
3376   }
3377 
3378   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3379   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3380   // If both shuffles use the same mask, and both shuffle within a single
3381   // vector, then it is worthwhile to move the swizzle after the operation.
3382   // The type-legalizer generates this pattern when loading illegal
3383   // vector types from memory. In many cases this allows additional shuffle
3384   // optimizations.
3385   // There are other cases where moving the shuffle after the xor/and/or
3386   // is profitable even if shuffles don't perform a swizzle.
3387   // If both shuffles use the same mask, and both shuffles have the same first
3388   // or second operand, then it might still be profitable to move the shuffle
3389   // after the xor/and/or operation.
3390   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3391     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3392     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3393 
3394     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3395            "Inputs to shuffles are not the same type");
3396 
3397     // Check that both shuffles use the same mask. The masks are known to be of
3398     // the same length because the result vector type is the same.
3399     // Check also that shuffles have only one use to avoid introducing extra
3400     // instructions.
3401     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3402         SVN0->getMask().equals(SVN1->getMask())) {
3403       SDValue ShOp = N0->getOperand(1);
3404 
3405       // Don't try to fold this node if it requires introducing a
3406       // build vector of all zeros that might be illegal at this stage.
3407       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3408         if (!LegalTypes)
3409           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3410         else
3411           ShOp = SDValue();
3412       }
3413 
3414       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3415       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3416       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3417       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3418         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3419                                       N0->getOperand(0), N1->getOperand(0));
3420         AddToWorklist(NewNode.getNode());
3421         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3422                                     SVN0->getMask());
3423       }
3424 
3425       // Don't try to fold this node if it requires introducing a
3426       // build vector of all zeros that might be illegal at this stage.
3427       ShOp = N0->getOperand(0);
3428       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3429         if (!LegalTypes)
3430           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3431         else
3432           ShOp = SDValue();
3433       }
3434 
3435       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3436       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3437       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3438       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3439         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3440                                       N0->getOperand(1), N1->getOperand(1));
3441         AddToWorklist(NewNode.getNode());
3442         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3443                                     SVN0->getMask());
3444       }
3445     }
3446   }
3447 
3448   return SDValue();
3449 }
3450 
3451 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3452 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3453                                        const SDLoc &DL) {
3454   SDValue LL, LR, RL, RR, N0CC, N1CC;
3455   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3456       !isSetCCEquivalent(N1, RL, RR, N1CC))
3457     return SDValue();
3458 
3459   assert(N0.getValueType() == N1.getValueType() &&
3460          "Unexpected operand types for bitwise logic op");
3461   assert(LL.getValueType() == LR.getValueType() &&
3462          RL.getValueType() == RR.getValueType() &&
3463          "Unexpected operand types for setcc");
3464 
3465   // If we're here post-legalization or the logic op type is not i1, the logic
3466   // op type must match a setcc result type. Also, all folds require new
3467   // operations on the left and right operands, so those types must match.
3468   EVT VT = N0.getValueType();
3469   EVT OpVT = LL.getValueType();
3470   if (LegalOperations || VT != MVT::i1)
3471     if (VT != getSetCCResultType(OpVT))
3472       return SDValue();
3473   if (OpVT != RL.getValueType())
3474     return SDValue();
3475 
3476   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3477   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3478   bool IsInteger = OpVT.isInteger();
3479   if (LR == RR && CC0 == CC1 && IsInteger) {
3480     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3481     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3482 
3483     // All bits clear?
3484     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3485     // All sign bits clear?
3486     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3487     // Any bits set?
3488     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3489     // Any sign bits set?
3490     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3491 
3492     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3493     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3494     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3495     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3496     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3497       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3498       AddToWorklist(Or.getNode());
3499       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3500     }
3501 
3502     // All bits set?
3503     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3504     // All sign bits set?
3505     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3506     // Any bits clear?
3507     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3508     // Any sign bits clear?
3509     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3510 
3511     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3512     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3513     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3514     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3515     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3516       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3517       AddToWorklist(And.getNode());
3518       return DAG.getSetCC(DL, VT, And, LR, CC1);
3519     }
3520   }
3521 
3522   // TODO: What is the 'or' equivalent of this fold?
3523   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3524   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3525       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3526        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3527     SDValue One = DAG.getConstant(1, DL, OpVT);
3528     SDValue Two = DAG.getConstant(2, DL, OpVT);
3529     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3530     AddToWorklist(Add.getNode());
3531     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3532   }
3533 
3534   // Try more general transforms if the predicates match and the only user of
3535   // the compares is the 'and' or 'or'.
3536   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3537       N0.hasOneUse() && N1.hasOneUse()) {
3538     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3539     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3540     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3541       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3542       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3543       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3544       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3545       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3546     }
3547   }
3548 
3549   // Canonicalize equivalent operands to LL == RL.
3550   if (LL == RR && LR == RL) {
3551     CC1 = ISD::getSetCCSwappedOperands(CC1);
3552     std::swap(RL, RR);
3553   }
3554 
3555   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3556   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3557   if (LL == RL && LR == RR) {
3558     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3559                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3560     if (NewCC != ISD::SETCC_INVALID &&
3561         (!LegalOperations ||
3562          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3563           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3564       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3565   }
3566 
3567   return SDValue();
3568 }
3569 
3570 /// This contains all DAGCombine rules which reduce two values combined by
3571 /// an And operation to a single value. This makes them reusable in the context
3572 /// of visitSELECT(). Rules involving constants are not included as
3573 /// visitSELECT() already handles those cases.
3574 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3575   EVT VT = N1.getValueType();
3576   SDLoc DL(N);
3577 
3578   // fold (and x, undef) -> 0
3579   if (N0.isUndef() || N1.isUndef())
3580     return DAG.getConstant(0, DL, VT);
3581 
3582   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3583     return V;
3584 
3585   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3586       VT.getSizeInBits() <= 64) {
3587     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3588       APInt ADDC = ADDI->getAPIntValue();
3589       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3590         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3591         // immediate for an add, but it is legal if its top c2 bits are set,
3592         // transform the ADD so the immediate doesn't need to be materialized
3593         // in a register.
3594         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3595           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3596                                              SRLI->getZExtValue());
3597           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3598             ADDC |= Mask;
3599             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3600               SDLoc DL0(N0);
3601               SDValue NewAdd =
3602                 DAG.getNode(ISD::ADD, DL0, VT,
3603                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3604               CombineTo(N0.getNode(), NewAdd);
3605               // Return N so it doesn't get rechecked!
3606               return SDValue(N, 0);
3607             }
3608           }
3609         }
3610       }
3611     }
3612   }
3613 
3614   // Reduce bit extract of low half of an integer to the narrower type.
3615   // (and (srl i64:x, K), KMask) ->
3616   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3617   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3618     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3619       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3620         unsigned Size = VT.getSizeInBits();
3621         const APInt &AndMask = CAnd->getAPIntValue();
3622         unsigned ShiftBits = CShift->getZExtValue();
3623 
3624         // Bail out, this node will probably disappear anyway.
3625         if (ShiftBits == 0)
3626           return SDValue();
3627 
3628         unsigned MaskBits = AndMask.countTrailingOnes();
3629         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3630 
3631         if (AndMask.isMask() &&
3632             // Required bits must not span the two halves of the integer and
3633             // must fit in the half size type.
3634             (ShiftBits + MaskBits <= Size / 2) &&
3635             TLI.isNarrowingProfitable(VT, HalfVT) &&
3636             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3637             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3638             TLI.isTruncateFree(VT, HalfVT) &&
3639             TLI.isZExtFree(HalfVT, VT)) {
3640           // The isNarrowingProfitable is to avoid regressions on PPC and
3641           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3642           // on downstream users of this. Those patterns could probably be
3643           // extended to handle extensions mixed in.
3644 
3645           SDValue SL(N0);
3646           assert(MaskBits <= Size);
3647 
3648           // Extracting the highest bit of the low half.
3649           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3650           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3651                                       N0.getOperand(0));
3652 
3653           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3654           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3655           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3656           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3657           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3658         }
3659       }
3660     }
3661   }
3662 
3663   return SDValue();
3664 }
3665 
3666 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3667                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3668                                    bool &NarrowLoad) {
3669   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3670 
3671   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3672     return false;
3673 
3674   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3675   LoadedVT = LoadN->getMemoryVT();
3676 
3677   if (ExtVT == LoadedVT &&
3678       (!LegalOperations ||
3679        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3680     // ZEXTLOAD will match without needing to change the size of the value being
3681     // loaded.
3682     NarrowLoad = false;
3683     return true;
3684   }
3685 
3686   // Do not change the width of a volatile load.
3687   if (LoadN->isVolatile())
3688     return false;
3689 
3690   // Do not generate loads of non-round integer types since these can
3691   // be expensive (and would be wrong if the type is not byte sized).
3692   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3693     return false;
3694 
3695   if (LegalOperations &&
3696       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3697     return false;
3698 
3699   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3700     return false;
3701 
3702   NarrowLoad = true;
3703   return true;
3704 }
3705 
3706 SDValue DAGCombiner::visitAND(SDNode *N) {
3707   SDValue N0 = N->getOperand(0);
3708   SDValue N1 = N->getOperand(1);
3709   EVT VT = N1.getValueType();
3710 
3711   // x & x --> x
3712   if (N0 == N1)
3713     return N0;
3714 
3715   // fold vector ops
3716   if (VT.isVector()) {
3717     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3718       return FoldedVOp;
3719 
3720     // fold (and x, 0) -> 0, vector edition
3721     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3722       // do not return N0, because undef node may exist in N0
3723       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3724                              SDLoc(N), N0.getValueType());
3725     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3726       // do not return N1, because undef node may exist in N1
3727       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3728                              SDLoc(N), N1.getValueType());
3729 
3730     // fold (and x, -1) -> x, vector edition
3731     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3732       return N1;
3733     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3734       return N0;
3735   }
3736 
3737   // fold (and c1, c2) -> c1&c2
3738   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3739   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3740   if (N0C && N1C && !N1C->isOpaque())
3741     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3742   // canonicalize constant to RHS
3743   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3744      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3745     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3746   // fold (and x, -1) -> x
3747   if (isAllOnesConstant(N1))
3748     return N0;
3749   // if (and x, c) is known to be zero, return 0
3750   unsigned BitWidth = VT.getScalarSizeInBits();
3751   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3752                                    APInt::getAllOnesValue(BitWidth)))
3753     return DAG.getConstant(0, SDLoc(N), VT);
3754 
3755   if (SDValue NewSel = foldBinOpIntoSelect(N))
3756     return NewSel;
3757 
3758   // reassociate and
3759   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3760     return RAND;
3761   // fold (and (or x, C), D) -> D if (C & D) == D
3762   if (N1C && N0.getOpcode() == ISD::OR)
3763     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3764       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3765         return N1;
3766   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3767   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3768     SDValue N0Op0 = N0.getOperand(0);
3769     APInt Mask = ~N1C->getAPIntValue();
3770     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3771     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3772       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3773                                  N0.getValueType(), N0Op0);
3774 
3775       // Replace uses of the AND with uses of the Zero extend node.
3776       CombineTo(N, Zext);
3777 
3778       // We actually want to replace all uses of the any_extend with the
3779       // zero_extend, to avoid duplicating things.  This will later cause this
3780       // AND to be folded.
3781       CombineTo(N0.getNode(), Zext);
3782       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3783     }
3784   }
3785   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3786   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3787   // already be zero by virtue of the width of the base type of the load.
3788   //
3789   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3790   // more cases.
3791   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3792        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3793        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3794        N0.getOperand(0).getResNo() == 0) ||
3795       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3796     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3797                                          N0 : N0.getOperand(0) );
3798 
3799     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3800     // This can be a pure constant or a vector splat, in which case we treat the
3801     // vector as a scalar and use the splat value.
3802     APInt Constant = APInt::getNullValue(1);
3803     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3804       Constant = C->getAPIntValue();
3805     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3806       APInt SplatValue, SplatUndef;
3807       unsigned SplatBitSize;
3808       bool HasAnyUndefs;
3809       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3810                                              SplatBitSize, HasAnyUndefs);
3811       if (IsSplat) {
3812         // Undef bits can contribute to a possible optimisation if set, so
3813         // set them.
3814         SplatValue |= SplatUndef;
3815 
3816         // The splat value may be something like "0x00FFFFFF", which means 0 for
3817         // the first vector value and FF for the rest, repeating. We need a mask
3818         // that will apply equally to all members of the vector, so AND all the
3819         // lanes of the constant together.
3820         EVT VT = Vector->getValueType(0);
3821         unsigned BitWidth = VT.getScalarSizeInBits();
3822 
3823         // If the splat value has been compressed to a bitlength lower
3824         // than the size of the vector lane, we need to re-expand it to
3825         // the lane size.
3826         if (BitWidth > SplatBitSize)
3827           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3828                SplatBitSize < BitWidth;
3829                SplatBitSize = SplatBitSize * 2)
3830             SplatValue |= SplatValue.shl(SplatBitSize);
3831 
3832         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3833         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3834         if (SplatBitSize % BitWidth == 0) {
3835           Constant = APInt::getAllOnesValue(BitWidth);
3836           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3837             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3838         }
3839       }
3840     }
3841 
3842     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3843     // actually legal and isn't going to get expanded, else this is a false
3844     // optimisation.
3845     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3846                                                     Load->getValueType(0),
3847                                                     Load->getMemoryVT());
3848 
3849     // Resize the constant to the same size as the original memory access before
3850     // extension. If it is still the AllOnesValue then this AND is completely
3851     // unneeded.
3852     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3853 
3854     bool B;
3855     switch (Load->getExtensionType()) {
3856     default: B = false; break;
3857     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3858     case ISD::ZEXTLOAD:
3859     case ISD::NON_EXTLOAD: B = true; break;
3860     }
3861 
3862     if (B && Constant.isAllOnesValue()) {
3863       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3864       // preserve semantics once we get rid of the AND.
3865       SDValue NewLoad(Load, 0);
3866 
3867       // Fold the AND away. NewLoad may get replaced immediately.
3868       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3869 
3870       if (Load->getExtensionType() == ISD::EXTLOAD) {
3871         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3872                               Load->getValueType(0), SDLoc(Load),
3873                               Load->getChain(), Load->getBasePtr(),
3874                               Load->getOffset(), Load->getMemoryVT(),
3875                               Load->getMemOperand());
3876         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3877         if (Load->getNumValues() == 3) {
3878           // PRE/POST_INC loads have 3 values.
3879           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3880                            NewLoad.getValue(2) };
3881           CombineTo(Load, To, 3, true);
3882         } else {
3883           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3884         }
3885       }
3886 
3887       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3888     }
3889   }
3890 
3891   // fold (and (load x), 255) -> (zextload x, i8)
3892   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3893   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3894   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3895                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3896                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3897     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3898     LoadSDNode *LN0 = HasAnyExt
3899       ? cast<LoadSDNode>(N0.getOperand(0))
3900       : cast<LoadSDNode>(N0);
3901     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3902         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3903       auto NarrowLoad = false;
3904       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3905       EVT ExtVT, LoadedVT;
3906       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3907                            NarrowLoad)) {
3908         if (!NarrowLoad) {
3909           SDValue NewLoad =
3910             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3911                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3912                            LN0->getMemOperand());
3913           AddToWorklist(N);
3914           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3915           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3916         } else {
3917           EVT PtrType = LN0->getOperand(1).getValueType();
3918 
3919           unsigned Alignment = LN0->getAlignment();
3920           SDValue NewPtr = LN0->getBasePtr();
3921 
3922           // For big endian targets, we need to add an offset to the pointer
3923           // to load the correct bytes.  For little endian systems, we merely
3924           // need to read fewer bytes from the same pointer.
3925           if (DAG.getDataLayout().isBigEndian()) {
3926             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3927             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3928             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3929             SDLoc DL(LN0);
3930             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3931                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3932             Alignment = MinAlign(Alignment, PtrOff);
3933           }
3934 
3935           AddToWorklist(NewPtr.getNode());
3936 
3937           SDValue Load = DAG.getExtLoad(
3938               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3939               LN0->getPointerInfo(), ExtVT, Alignment,
3940               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3941           AddToWorklist(N);
3942           CombineTo(LN0, Load, Load.getValue(1));
3943           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3944         }
3945       }
3946     }
3947   }
3948 
3949   if (SDValue Combined = visitANDLike(N0, N1, N))
3950     return Combined;
3951 
3952   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3953   if (N0.getOpcode() == N1.getOpcode())
3954     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3955       return Tmp;
3956 
3957   // Masking the negated extension of a boolean is just the zero-extended
3958   // boolean:
3959   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3960   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3961   //
3962   // Note: the SimplifyDemandedBits fold below can make an information-losing
3963   // transform, and then we have no way to find this better fold.
3964   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3965     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
3966       SDValue SubRHS = N0.getOperand(1);
3967       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3968           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3969         return SubRHS;
3970       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3971           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3972         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3973     }
3974   }
3975 
3976   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3977   // fold (and (sra)) -> (and (srl)) when possible.
3978   if (SimplifyDemandedBits(SDValue(N, 0)))
3979     return SDValue(N, 0);
3980 
3981   // fold (zext_inreg (extload x)) -> (zextload x)
3982   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3983     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3984     EVT MemVT = LN0->getMemoryVT();
3985     // If we zero all the possible extended bits, then we can turn this into
3986     // a zextload if we are running before legalize or the operation is legal.
3987     unsigned BitWidth = N1.getScalarValueSizeInBits();
3988     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3989                            BitWidth - MemVT.getScalarSizeInBits())) &&
3990         ((!LegalOperations && !LN0->isVolatile()) ||
3991          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3992       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3993                                        LN0->getChain(), LN0->getBasePtr(),
3994                                        MemVT, LN0->getMemOperand());
3995       AddToWorklist(N);
3996       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3997       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3998     }
3999   }
4000   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4001   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4002       N0.hasOneUse()) {
4003     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4004     EVT MemVT = LN0->getMemoryVT();
4005     // If we zero all the possible extended bits, then we can turn this into
4006     // a zextload if we are running before legalize or the operation is legal.
4007     unsigned BitWidth = N1.getScalarValueSizeInBits();
4008     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4009                            BitWidth - MemVT.getScalarSizeInBits())) &&
4010         ((!LegalOperations && !LN0->isVolatile()) ||
4011          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4012       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4013                                        LN0->getChain(), LN0->getBasePtr(),
4014                                        MemVT, LN0->getMemOperand());
4015       AddToWorklist(N);
4016       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4017       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4018     }
4019   }
4020   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4021   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4022     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4023                                            N0.getOperand(1), false))
4024       return BSwap;
4025   }
4026 
4027   return SDValue();
4028 }
4029 
4030 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4031 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4032                                         bool DemandHighBits) {
4033   if (!LegalOperations)
4034     return SDValue();
4035 
4036   EVT VT = N->getValueType(0);
4037   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4038     return SDValue();
4039   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4040     return SDValue();
4041 
4042   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4043   bool LookPassAnd0 = false;
4044   bool LookPassAnd1 = false;
4045   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4046       std::swap(N0, N1);
4047   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4048       std::swap(N0, N1);
4049   if (N0.getOpcode() == ISD::AND) {
4050     if (!N0.getNode()->hasOneUse())
4051       return SDValue();
4052     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4053     if (!N01C || N01C->getZExtValue() != 0xFF00)
4054       return SDValue();
4055     N0 = N0.getOperand(0);
4056     LookPassAnd0 = true;
4057   }
4058 
4059   if (N1.getOpcode() == ISD::AND) {
4060     if (!N1.getNode()->hasOneUse())
4061       return SDValue();
4062     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4063     if (!N11C || N11C->getZExtValue() != 0xFF)
4064       return SDValue();
4065     N1 = N1.getOperand(0);
4066     LookPassAnd1 = true;
4067   }
4068 
4069   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4070     std::swap(N0, N1);
4071   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4072     return SDValue();
4073   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4074     return SDValue();
4075 
4076   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4077   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4078   if (!N01C || !N11C)
4079     return SDValue();
4080   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4081     return SDValue();
4082 
4083   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4084   SDValue N00 = N0->getOperand(0);
4085   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4086     if (!N00.getNode()->hasOneUse())
4087       return SDValue();
4088     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4089     if (!N001C || N001C->getZExtValue() != 0xFF)
4090       return SDValue();
4091     N00 = N00.getOperand(0);
4092     LookPassAnd0 = true;
4093   }
4094 
4095   SDValue N10 = N1->getOperand(0);
4096   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4097     if (!N10.getNode()->hasOneUse())
4098       return SDValue();
4099     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4100     if (!N101C || N101C->getZExtValue() != 0xFF00)
4101       return SDValue();
4102     N10 = N10.getOperand(0);
4103     LookPassAnd1 = true;
4104   }
4105 
4106   if (N00 != N10)
4107     return SDValue();
4108 
4109   // Make sure everything beyond the low halfword gets set to zero since the SRL
4110   // 16 will clear the top bits.
4111   unsigned OpSizeInBits = VT.getSizeInBits();
4112   if (DemandHighBits && OpSizeInBits > 16) {
4113     // If the left-shift isn't masked out then the only way this is a bswap is
4114     // if all bits beyond the low 8 are 0. In that case the entire pattern
4115     // reduces to a left shift anyway: leave it for other parts of the combiner.
4116     if (!LookPassAnd0)
4117       return SDValue();
4118 
4119     // However, if the right shift isn't masked out then it might be because
4120     // it's not needed. See if we can spot that too.
4121     if (!LookPassAnd1 &&
4122         !DAG.MaskedValueIsZero(
4123             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4124       return SDValue();
4125   }
4126 
4127   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4128   if (OpSizeInBits > 16) {
4129     SDLoc DL(N);
4130     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4131                       DAG.getConstant(OpSizeInBits - 16, DL,
4132                                       getShiftAmountTy(VT)));
4133   }
4134   return Res;
4135 }
4136 
4137 /// Return true if the specified node is an element that makes up a 32-bit
4138 /// packed halfword byteswap.
4139 /// ((x & 0x000000ff) << 8) |
4140 /// ((x & 0x0000ff00) >> 8) |
4141 /// ((x & 0x00ff0000) << 8) |
4142 /// ((x & 0xff000000) >> 8)
4143 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4144   if (!N.getNode()->hasOneUse())
4145     return false;
4146 
4147   unsigned Opc = N.getOpcode();
4148   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4149     return false;
4150 
4151   SDValue N0 = N.getOperand(0);
4152   unsigned Opc0 = N0.getOpcode();
4153   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4154     return false;
4155 
4156   ConstantSDNode *N1C = nullptr;
4157   // SHL or SRL: look upstream for AND mask operand
4158   if (Opc == ISD::AND)
4159     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4160   else if (Opc0 == ISD::AND)
4161     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4162   if (!N1C)
4163     return false;
4164 
4165   unsigned MaskByteOffset;
4166   switch (N1C->getZExtValue()) {
4167   default:
4168     return false;
4169   case 0xFF:       MaskByteOffset = 0; break;
4170   case 0xFF00:     MaskByteOffset = 1; break;
4171   case 0xFF0000:   MaskByteOffset = 2; break;
4172   case 0xFF000000: MaskByteOffset = 3; break;
4173   }
4174 
4175   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4176   if (Opc == ISD::AND) {
4177     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4178       // (x >> 8) & 0xff
4179       // (x >> 8) & 0xff0000
4180       if (Opc0 != ISD::SRL)
4181         return false;
4182       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4183       if (!C || C->getZExtValue() != 8)
4184         return false;
4185     } else {
4186       // (x << 8) & 0xff00
4187       // (x << 8) & 0xff000000
4188       if (Opc0 != ISD::SHL)
4189         return false;
4190       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4191       if (!C || C->getZExtValue() != 8)
4192         return false;
4193     }
4194   } else if (Opc == ISD::SHL) {
4195     // (x & 0xff) << 8
4196     // (x & 0xff0000) << 8
4197     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4198       return false;
4199     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4200     if (!C || C->getZExtValue() != 8)
4201       return false;
4202   } else { // Opc == ISD::SRL
4203     // (x & 0xff00) >> 8
4204     // (x & 0xff000000) >> 8
4205     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4206       return false;
4207     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4208     if (!C || C->getZExtValue() != 8)
4209       return false;
4210   }
4211 
4212   if (Parts[MaskByteOffset])
4213     return false;
4214 
4215   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4216   return true;
4217 }
4218 
4219 /// Match a 32-bit packed halfword bswap. That is
4220 /// ((x & 0x000000ff) << 8) |
4221 /// ((x & 0x0000ff00) >> 8) |
4222 /// ((x & 0x00ff0000) << 8) |
4223 /// ((x & 0xff000000) >> 8)
4224 /// => (rotl (bswap x), 16)
4225 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4226   if (!LegalOperations)
4227     return SDValue();
4228 
4229   EVT VT = N->getValueType(0);
4230   if (VT != MVT::i32)
4231     return SDValue();
4232   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4233     return SDValue();
4234 
4235   // Look for either
4236   // (or (or (and), (and)), (or (and), (and)))
4237   // (or (or (or (and), (and)), (and)), (and))
4238   if (N0.getOpcode() != ISD::OR)
4239     return SDValue();
4240   SDValue N00 = N0.getOperand(0);
4241   SDValue N01 = N0.getOperand(1);
4242   SDNode *Parts[4] = {};
4243 
4244   if (N1.getOpcode() == ISD::OR &&
4245       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4246     // (or (or (and), (and)), (or (and), (and)))
4247     if (!isBSwapHWordElement(N00, Parts))
4248       return SDValue();
4249 
4250     if (!isBSwapHWordElement(N01, Parts))
4251       return SDValue();
4252     SDValue N10 = N1.getOperand(0);
4253     if (!isBSwapHWordElement(N10, Parts))
4254       return SDValue();
4255     SDValue N11 = N1.getOperand(1);
4256     if (!isBSwapHWordElement(N11, Parts))
4257       return SDValue();
4258   } else {
4259     // (or (or (or (and), (and)), (and)), (and))
4260     if (!isBSwapHWordElement(N1, Parts))
4261       return SDValue();
4262     if (!isBSwapHWordElement(N01, Parts))
4263       return SDValue();
4264     if (N00.getOpcode() != ISD::OR)
4265       return SDValue();
4266     SDValue N000 = N00.getOperand(0);
4267     if (!isBSwapHWordElement(N000, Parts))
4268       return SDValue();
4269     SDValue N001 = N00.getOperand(1);
4270     if (!isBSwapHWordElement(N001, Parts))
4271       return SDValue();
4272   }
4273 
4274   // Make sure the parts are all coming from the same node.
4275   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4276     return SDValue();
4277 
4278   SDLoc DL(N);
4279   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4280                               SDValue(Parts[0], 0));
4281 
4282   // Result of the bswap should be rotated by 16. If it's not legal, then
4283   // do  (x << 16) | (x >> 16).
4284   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4285   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4286     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4287   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4288     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4289   return DAG.getNode(ISD::OR, DL, VT,
4290                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4291                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4292 }
4293 
4294 /// This contains all DAGCombine rules which reduce two values combined by
4295 /// an Or operation to a single value \see visitANDLike().
4296 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4297   EVT VT = N1.getValueType();
4298   SDLoc DL(N);
4299 
4300   // fold (or x, undef) -> -1
4301   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4302     return DAG.getAllOnesConstant(DL, VT);
4303 
4304   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4305     return V;
4306 
4307   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4308   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4309       // Don't increase # computations.
4310       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4311     // We can only do this xform if we know that bits from X that are set in C2
4312     // but not in C1 are already zero.  Likewise for Y.
4313     if (const ConstantSDNode *N0O1C =
4314         getAsNonOpaqueConstant(N0.getOperand(1))) {
4315       if (const ConstantSDNode *N1O1C =
4316           getAsNonOpaqueConstant(N1.getOperand(1))) {
4317         // We can only do this xform if we know that bits from X that are set in
4318         // C2 but not in C1 are already zero.  Likewise for Y.
4319         const APInt &LHSMask = N0O1C->getAPIntValue();
4320         const APInt &RHSMask = N1O1C->getAPIntValue();
4321 
4322         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4323             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4324           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4325                                   N0.getOperand(0), N1.getOperand(0));
4326           return DAG.getNode(ISD::AND, DL, VT, X,
4327                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4328         }
4329       }
4330     }
4331   }
4332 
4333   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4334   if (N0.getOpcode() == ISD::AND &&
4335       N1.getOpcode() == ISD::AND &&
4336       N0.getOperand(0) == N1.getOperand(0) &&
4337       // Don't increase # computations.
4338       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4339     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4340                             N0.getOperand(1), N1.getOperand(1));
4341     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4342   }
4343 
4344   return SDValue();
4345 }
4346 
4347 SDValue DAGCombiner::visitOR(SDNode *N) {
4348   SDValue N0 = N->getOperand(0);
4349   SDValue N1 = N->getOperand(1);
4350   EVT VT = N1.getValueType();
4351 
4352   // x | x --> x
4353   if (N0 == N1)
4354     return N0;
4355 
4356   // fold vector ops
4357   if (VT.isVector()) {
4358     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4359       return FoldedVOp;
4360 
4361     // fold (or x, 0) -> x, vector edition
4362     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4363       return N1;
4364     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4365       return N0;
4366 
4367     // fold (or x, -1) -> -1, vector edition
4368     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4369       // do not return N0, because undef node may exist in N0
4370       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4371     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4372       // do not return N1, because undef node may exist in N1
4373       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4374 
4375     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4376     // Do this only if the resulting shuffle is legal.
4377     if (isa<ShuffleVectorSDNode>(N0) &&
4378         isa<ShuffleVectorSDNode>(N1) &&
4379         // Avoid folding a node with illegal type.
4380         TLI.isTypeLegal(VT)) {
4381       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4382       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4383       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4384       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4385       // Ensure both shuffles have a zero input.
4386       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4387         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4388         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4389         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4390         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4391         bool CanFold = true;
4392         int NumElts = VT.getVectorNumElements();
4393         SmallVector<int, 4> Mask(NumElts);
4394 
4395         for (int i = 0; i != NumElts; ++i) {
4396           int M0 = SV0->getMaskElt(i);
4397           int M1 = SV1->getMaskElt(i);
4398 
4399           // Determine if either index is pointing to a zero vector.
4400           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4401           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4402 
4403           // If one element is zero and the otherside is undef, keep undef.
4404           // This also handles the case that both are undef.
4405           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4406             Mask[i] = -1;
4407             continue;
4408           }
4409 
4410           // Make sure only one of the elements is zero.
4411           if (M0Zero == M1Zero) {
4412             CanFold = false;
4413             break;
4414           }
4415 
4416           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4417 
4418           // We have a zero and non-zero element. If the non-zero came from
4419           // SV0 make the index a LHS index. If it came from SV1, make it
4420           // a RHS index. We need to mod by NumElts because we don't care
4421           // which operand it came from in the original shuffles.
4422           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4423         }
4424 
4425         if (CanFold) {
4426           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4427           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4428 
4429           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4430           if (!LegalMask) {
4431             std::swap(NewLHS, NewRHS);
4432             ShuffleVectorSDNode::commuteMask(Mask);
4433             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4434           }
4435 
4436           if (LegalMask)
4437             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4438         }
4439       }
4440     }
4441   }
4442 
4443   // fold (or c1, c2) -> c1|c2
4444   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4445   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4446   if (N0C && N1C && !N1C->isOpaque())
4447     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4448   // canonicalize constant to RHS
4449   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4450      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4451     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4452   // fold (or x, 0) -> x
4453   if (isNullConstant(N1))
4454     return N0;
4455   // fold (or x, -1) -> -1
4456   if (isAllOnesConstant(N1))
4457     return N1;
4458 
4459   if (SDValue NewSel = foldBinOpIntoSelect(N))
4460     return NewSel;
4461 
4462   // fold (or x, c) -> c iff (x & ~c) == 0
4463   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4464     return N1;
4465 
4466   if (SDValue Combined = visitORLike(N0, N1, N))
4467     return Combined;
4468 
4469   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4470   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4471     return BSwap;
4472   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4473     return BSwap;
4474 
4475   // reassociate or
4476   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4477     return ROR;
4478 
4479   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4480   // iff (c1 & c2) != 0.
4481   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4482     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4483       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4484         if (SDValue COR =
4485                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4486           return DAG.getNode(
4487               ISD::AND, SDLoc(N), VT,
4488               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4489         return SDValue();
4490       }
4491     }
4492   }
4493 
4494   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4495   if (N0.getOpcode() == N1.getOpcode())
4496     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4497       return Tmp;
4498 
4499   // See if this is some rotate idiom.
4500   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4501     return SDValue(Rot, 0);
4502 
4503   if (SDValue Load = MatchLoadCombine(N))
4504     return Load;
4505 
4506   // Simplify the operands using demanded-bits information.
4507   if (SimplifyDemandedBits(SDValue(N, 0)))
4508     return SDValue(N, 0);
4509 
4510   return SDValue();
4511 }
4512 
4513 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4514 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4515   if (Op.getOpcode() == ISD::AND) {
4516     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4517       Mask = Op.getOperand(1);
4518       Op = Op.getOperand(0);
4519     } else {
4520       return false;
4521     }
4522   }
4523 
4524   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4525     Shift = Op;
4526     return true;
4527   }
4528 
4529   return false;
4530 }
4531 
4532 // Return true if we can prove that, whenever Neg and Pos are both in the
4533 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4534 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4535 //
4536 //     (or (shift1 X, Neg), (shift2 X, Pos))
4537 //
4538 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4539 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4540 // to consider shift amounts with defined behavior.
4541 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4542   // If EltSize is a power of 2 then:
4543   //
4544   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4545   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4546   //
4547   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4548   // for the stronger condition:
4549   //
4550   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4551   //
4552   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4553   // we can just replace Neg with Neg' for the rest of the function.
4554   //
4555   // In other cases we check for the even stronger condition:
4556   //
4557   //     Neg == EltSize - Pos                                    [B]
4558   //
4559   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4560   // behavior if Pos == 0 (and consequently Neg == EltSize).
4561   //
4562   // We could actually use [A] whenever EltSize is a power of 2, but the
4563   // only extra cases that it would match are those uninteresting ones
4564   // where Neg and Pos are never in range at the same time.  E.g. for
4565   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4566   // as well as (sub 32, Pos), but:
4567   //
4568   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4569   //
4570   // always invokes undefined behavior for 32-bit X.
4571   //
4572   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4573   unsigned MaskLoBits = 0;
4574   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4575     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4576       if (NegC->getAPIntValue() == EltSize - 1) {
4577         Neg = Neg.getOperand(0);
4578         MaskLoBits = Log2_64(EltSize);
4579       }
4580     }
4581   }
4582 
4583   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4584   if (Neg.getOpcode() != ISD::SUB)
4585     return false;
4586   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4587   if (!NegC)
4588     return false;
4589   SDValue NegOp1 = Neg.getOperand(1);
4590 
4591   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4592   // Pos'.  The truncation is redundant for the purpose of the equality.
4593   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4594     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4595       if (PosC->getAPIntValue() == EltSize - 1)
4596         Pos = Pos.getOperand(0);
4597 
4598   // The condition we need is now:
4599   //
4600   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4601   //
4602   // If NegOp1 == Pos then we need:
4603   //
4604   //              EltSize & Mask == NegC & Mask
4605   //
4606   // (because "x & Mask" is a truncation and distributes through subtraction).
4607   APInt Width;
4608   if (Pos == NegOp1)
4609     Width = NegC->getAPIntValue();
4610 
4611   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4612   // Then the condition we want to prove becomes:
4613   //
4614   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4615   //
4616   // which, again because "x & Mask" is a truncation, becomes:
4617   //
4618   //                NegC & Mask == (EltSize - PosC) & Mask
4619   //             EltSize & Mask == (NegC + PosC) & Mask
4620   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4621     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4622       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4623     else
4624       return false;
4625   } else
4626     return false;
4627 
4628   // Now we just need to check that EltSize & Mask == Width & Mask.
4629   if (MaskLoBits)
4630     // EltSize & Mask is 0 since Mask is EltSize - 1.
4631     return Width.getLoBits(MaskLoBits) == 0;
4632   return Width == EltSize;
4633 }
4634 
4635 // A subroutine of MatchRotate used once we have found an OR of two opposite
4636 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4637 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4638 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4639 // Neg with outer conversions stripped away.
4640 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4641                                        SDValue Neg, SDValue InnerPos,
4642                                        SDValue InnerNeg, unsigned PosOpcode,
4643                                        unsigned NegOpcode, const SDLoc &DL) {
4644   // fold (or (shl x, (*ext y)),
4645   //          (srl x, (*ext (sub 32, y)))) ->
4646   //   (rotl x, y) or (rotr x, (sub 32, y))
4647   //
4648   // fold (or (shl x, (*ext (sub 32, y))),
4649   //          (srl x, (*ext y))) ->
4650   //   (rotr x, y) or (rotl x, (sub 32, y))
4651   EVT VT = Shifted.getValueType();
4652   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4653     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4654     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4655                        HasPos ? Pos : Neg).getNode();
4656   }
4657 
4658   return nullptr;
4659 }
4660 
4661 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4662 // idioms for rotate, and if the target supports rotation instructions, generate
4663 // a rot[lr].
4664 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4665   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4666   EVT VT = LHS.getValueType();
4667   if (!TLI.isTypeLegal(VT)) return nullptr;
4668 
4669   // The target must have at least one rotate flavor.
4670   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4671   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4672   if (!HasROTL && !HasROTR) return nullptr;
4673 
4674   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4675   SDValue LHSShift;   // The shift.
4676   SDValue LHSMask;    // AND value if any.
4677   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4678     return nullptr; // Not part of a rotate.
4679 
4680   SDValue RHSShift;   // The shift.
4681   SDValue RHSMask;    // AND value if any.
4682   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4683     return nullptr; // Not part of a rotate.
4684 
4685   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4686     return nullptr;   // Not shifting the same value.
4687 
4688   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4689     return nullptr;   // Shifts must disagree.
4690 
4691   // Canonicalize shl to left side in a shl/srl pair.
4692   if (RHSShift.getOpcode() == ISD::SHL) {
4693     std::swap(LHS, RHS);
4694     std::swap(LHSShift, RHSShift);
4695     std::swap(LHSMask, RHSMask);
4696   }
4697 
4698   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4699   SDValue LHSShiftArg = LHSShift.getOperand(0);
4700   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4701   SDValue RHSShiftArg = RHSShift.getOperand(0);
4702   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4703 
4704   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4705   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4706   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4707                                         ConstantSDNode *RHS) {
4708     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4709   };
4710   if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4711     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4712                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4713 
4714     // If there is an AND of either shifted operand, apply it to the result.
4715     if (LHSMask.getNode() || RHSMask.getNode()) {
4716       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4717       SDValue Mask = AllOnes;
4718 
4719       if (LHSMask.getNode()) {
4720         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4721         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4722                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4723       }
4724       if (RHSMask.getNode()) {
4725         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4726         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4727                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4728       }
4729 
4730       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4731     }
4732 
4733     return Rot.getNode();
4734   }
4735 
4736   // If there is a mask here, and we have a variable shift, we can't be sure
4737   // that we're masking out the right stuff.
4738   if (LHSMask.getNode() || RHSMask.getNode())
4739     return nullptr;
4740 
4741   // If the shift amount is sign/zext/any-extended just peel it off.
4742   SDValue LExtOp0 = LHSShiftAmt;
4743   SDValue RExtOp0 = RHSShiftAmt;
4744   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4745        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4746        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4747        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4748       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4749        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4750        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4751        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4752     LExtOp0 = LHSShiftAmt.getOperand(0);
4753     RExtOp0 = RHSShiftAmt.getOperand(0);
4754   }
4755 
4756   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4757                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4758   if (TryL)
4759     return TryL;
4760 
4761   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4762                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4763   if (TryR)
4764     return TryR;
4765 
4766   return nullptr;
4767 }
4768 
4769 namespace {
4770 /// Represents known origin of an individual byte in load combine pattern. The
4771 /// value of the byte is either constant zero or comes from memory.
4772 struct ByteProvider {
4773   // For constant zero providers Load is set to nullptr. For memory providers
4774   // Load represents the node which loads the byte from memory.
4775   // ByteOffset is the offset of the byte in the value produced by the load.
4776   LoadSDNode *Load;
4777   unsigned ByteOffset;
4778 
4779   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4780 
4781   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4782     return ByteProvider(Load, ByteOffset);
4783   }
4784   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4785 
4786   bool isConstantZero() const { return !Load; }
4787   bool isMemory() const { return Load; }
4788 
4789   bool operator==(const ByteProvider &Other) const {
4790     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4791   }
4792 
4793 private:
4794   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4795       : Load(Load), ByteOffset(ByteOffset) {}
4796 };
4797 
4798 /// Recursively traverses the expression calculating the origin of the requested
4799 /// byte of the given value. Returns None if the provider can't be calculated.
4800 ///
4801 /// For all the values except the root of the expression verifies that the value
4802 /// has exactly one use and if it's not true return None. This way if the origin
4803 /// of the byte is returned it's guaranteed that the values which contribute to
4804 /// the byte are not used outside of this expression.
4805 ///
4806 /// Because the parts of the expression are not allowed to have more than one
4807 /// use this function iterates over trees, not DAGs. So it never visits the same
4808 /// node more than once.
4809 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4810                                                    unsigned Depth,
4811                                                    bool Root = false) {
4812   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4813   if (Depth == 10)
4814     return None;
4815 
4816   if (!Root && !Op.hasOneUse())
4817     return None;
4818 
4819   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4820   unsigned BitWidth = Op.getValueSizeInBits();
4821   if (BitWidth % 8 != 0)
4822     return None;
4823   unsigned ByteWidth = BitWidth / 8;
4824   assert(Index < ByteWidth && "invalid index requested");
4825   (void) ByteWidth;
4826 
4827   switch (Op.getOpcode()) {
4828   case ISD::OR: {
4829     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4830     if (!LHS)
4831       return None;
4832     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4833     if (!RHS)
4834       return None;
4835 
4836     if (LHS->isConstantZero())
4837       return RHS;
4838     if (RHS->isConstantZero())
4839       return LHS;
4840     return None;
4841   }
4842   case ISD::SHL: {
4843     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4844     if (!ShiftOp)
4845       return None;
4846 
4847     uint64_t BitShift = ShiftOp->getZExtValue();
4848     if (BitShift % 8 != 0)
4849       return None;
4850     uint64_t ByteShift = BitShift / 8;
4851 
4852     return Index < ByteShift
4853                ? ByteProvider::getConstantZero()
4854                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4855                                        Depth + 1);
4856   }
4857   case ISD::ANY_EXTEND:
4858   case ISD::SIGN_EXTEND:
4859   case ISD::ZERO_EXTEND: {
4860     SDValue NarrowOp = Op->getOperand(0);
4861     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4862     if (NarrowBitWidth % 8 != 0)
4863       return None;
4864     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4865 
4866     if (Index >= NarrowByteWidth)
4867       return Op.getOpcode() == ISD::ZERO_EXTEND
4868                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4869                  : None;
4870     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4871   }
4872   case ISD::BSWAP:
4873     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4874                                  Depth + 1);
4875   case ISD::LOAD: {
4876     auto L = cast<LoadSDNode>(Op.getNode());
4877     if (L->isVolatile() || L->isIndexed())
4878       return None;
4879 
4880     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4881     if (NarrowBitWidth % 8 != 0)
4882       return None;
4883     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4884 
4885     if (Index >= NarrowByteWidth)
4886       return L->getExtensionType() == ISD::ZEXTLOAD
4887                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4888                  : None;
4889     return ByteProvider::getMemory(L, Index);
4890   }
4891   }
4892 
4893   return None;
4894 }
4895 } // namespace
4896 
4897 /// Match a pattern where a wide type scalar value is loaded by several narrow
4898 /// loads and combined by shifts and ors. Fold it into a single load or a load
4899 /// and a BSWAP if the targets supports it.
4900 ///
4901 /// Assuming little endian target:
4902 ///  i8 *a = ...
4903 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4904 /// =>
4905 ///  i32 val = *((i32)a)
4906 ///
4907 ///  i8 *a = ...
4908 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4909 /// =>
4910 ///  i32 val = BSWAP(*((i32)a))
4911 ///
4912 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4913 /// interact well with the worklist mechanism. When a part of the pattern is
4914 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4915 /// but the root node of the pattern which triggers the load combine is not
4916 /// necessarily a direct user of the changed node. For example, once the address
4917 /// of t28 load is reassociated load combine won't be triggered:
4918 ///             t25: i32 = add t4, Constant:i32<2>
4919 ///           t26: i64 = sign_extend t25
4920 ///        t27: i64 = add t2, t26
4921 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4922 ///     t29: i32 = zero_extend t28
4923 ///   t32: i32 = shl t29, Constant:i8<8>
4924 /// t33: i32 = or t23, t32
4925 /// As a possible fix visitLoad can check if the load can be a part of a load
4926 /// combine pattern and add corresponding OR roots to the worklist.
4927 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4928   assert(N->getOpcode() == ISD::OR &&
4929          "Can only match load combining against OR nodes");
4930 
4931   // Handles simple types only
4932   EVT VT = N->getValueType(0);
4933   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4934     return SDValue();
4935   unsigned ByteWidth = VT.getSizeInBits() / 8;
4936 
4937   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4938   // Before legalize we can introduce too wide illegal loads which will be later
4939   // split into legal sized loads. This enables us to combine i64 load by i8
4940   // patterns to a couple of i32 loads on 32 bit targets.
4941   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4942     return SDValue();
4943 
4944   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4945     unsigned BW, unsigned i) { return i; };
4946   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4947     unsigned BW, unsigned i) { return BW - i - 1; };
4948 
4949   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4950   auto MemoryByteOffset = [&] (ByteProvider P) {
4951     assert(P.isMemory() && "Must be a memory byte provider");
4952     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4953     assert(LoadBitWidth % 8 == 0 &&
4954            "can only analyze providers for individual bytes not bit");
4955     unsigned LoadByteWidth = LoadBitWidth / 8;
4956     return IsBigEndianTarget
4957             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4958             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4959   };
4960 
4961   Optional<BaseIndexOffset> Base;
4962   SDValue Chain;
4963 
4964   SmallSet<LoadSDNode *, 8> Loads;
4965   Optional<ByteProvider> FirstByteProvider;
4966   int64_t FirstOffset = INT64_MAX;
4967 
4968   // Check if all the bytes of the OR we are looking at are loaded from the same
4969   // base address. Collect bytes offsets from Base address in ByteOffsets.
4970   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4971   for (unsigned i = 0; i < ByteWidth; i++) {
4972     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4973     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4974       return SDValue();
4975 
4976     LoadSDNode *L = P->Load;
4977     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4978            "Must be enforced by calculateByteProvider");
4979     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4980 
4981     // All loads must share the same chain
4982     SDValue LChain = L->getChain();
4983     if (!Chain)
4984       Chain = LChain;
4985     else if (Chain != LChain)
4986       return SDValue();
4987 
4988     // Loads must share the same base address
4989     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4990     int64_t ByteOffsetFromBase = 0;
4991     if (!Base)
4992       Base = Ptr;
4993     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
4994       return SDValue();
4995 
4996     // Calculate the offset of the current byte from the base address
4997     ByteOffsetFromBase += MemoryByteOffset(*P);
4998     ByteOffsets[i] = ByteOffsetFromBase;
4999 
5000     // Remember the first byte load
5001     if (ByteOffsetFromBase < FirstOffset) {
5002       FirstByteProvider = P;
5003       FirstOffset = ByteOffsetFromBase;
5004     }
5005 
5006     Loads.insert(L);
5007   }
5008   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
5009          "memory, so there must be at least one load which produces the value");
5010   assert(Base && "Base address of the accessed memory location must be set");
5011   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5012 
5013   // Check if the bytes of the OR we are looking at match with either big or
5014   // little endian value load
5015   bool BigEndian = true, LittleEndian = true;
5016   for (unsigned i = 0; i < ByteWidth; i++) {
5017     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5018     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5019     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5020     if (!BigEndian && !LittleEndian)
5021       return SDValue();
5022   }
5023   assert((BigEndian != LittleEndian) && "should be either or");
5024   assert(FirstByteProvider && "must be set");
5025 
5026   // Ensure that the first byte is loaded from zero offset of the first load.
5027   // So the combined value can be loaded from the first load address.
5028   if (MemoryByteOffset(*FirstByteProvider) != 0)
5029     return SDValue();
5030   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5031 
5032   // The node we are looking at matches with the pattern, check if we can
5033   // replace it with a single load and bswap if needed.
5034 
5035   // If the load needs byte swap check if the target supports it
5036   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5037 
5038   // Before legalize we can introduce illegal bswaps which will be later
5039   // converted to an explicit bswap sequence. This way we end up with a single
5040   // load and byte shuffling instead of several loads and byte shuffling.
5041   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5042     return SDValue();
5043 
5044   // Check that a load of the wide type is both allowed and fast on the target
5045   bool Fast = false;
5046   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5047                                         VT, FirstLoad->getAddressSpace(),
5048                                         FirstLoad->getAlignment(), &Fast);
5049   if (!Allowed || !Fast)
5050     return SDValue();
5051 
5052   SDValue NewLoad =
5053       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5054                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5055 
5056   // Transfer chain users from old loads to the new load.
5057   for (LoadSDNode *L : Loads)
5058     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5059 
5060   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5061 }
5062 
5063 SDValue DAGCombiner::visitXOR(SDNode *N) {
5064   SDValue N0 = N->getOperand(0);
5065   SDValue N1 = N->getOperand(1);
5066   EVT VT = N0.getValueType();
5067 
5068   // fold vector ops
5069   if (VT.isVector()) {
5070     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5071       return FoldedVOp;
5072 
5073     // fold (xor x, 0) -> x, vector edition
5074     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5075       return N1;
5076     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5077       return N0;
5078   }
5079 
5080   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5081   if (N0.isUndef() && N1.isUndef())
5082     return DAG.getConstant(0, SDLoc(N), VT);
5083   // fold (xor x, undef) -> undef
5084   if (N0.isUndef())
5085     return N0;
5086   if (N1.isUndef())
5087     return N1;
5088   // fold (xor c1, c2) -> c1^c2
5089   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5090   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5091   if (N0C && N1C)
5092     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5093   // canonicalize constant to RHS
5094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5095      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5096     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5097   // fold (xor x, 0) -> x
5098   if (isNullConstant(N1))
5099     return N0;
5100 
5101   if (SDValue NewSel = foldBinOpIntoSelect(N))
5102     return NewSel;
5103 
5104   // reassociate xor
5105   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5106     return RXOR;
5107 
5108   // fold !(x cc y) -> (x !cc y)
5109   SDValue LHS, RHS, CC;
5110   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5111     bool isInt = LHS.getValueType().isInteger();
5112     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5113                                                isInt);
5114 
5115     if (!LegalOperations ||
5116         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5117       switch (N0.getOpcode()) {
5118       default:
5119         llvm_unreachable("Unhandled SetCC Equivalent!");
5120       case ISD::SETCC:
5121         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5122       case ISD::SELECT_CC:
5123         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5124                                N0.getOperand(3), NotCC);
5125       }
5126     }
5127   }
5128 
5129   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5130   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5131       N0.getNode()->hasOneUse() &&
5132       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5133     SDValue V = N0.getOperand(0);
5134     SDLoc DL(N0);
5135     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5136                     DAG.getConstant(1, DL, V.getValueType()));
5137     AddToWorklist(V.getNode());
5138     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5139   }
5140 
5141   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5142   if (isOneConstant(N1) && VT == MVT::i1 &&
5143       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5144     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5145     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5146       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5147       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5148       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5149       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5150       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5151     }
5152   }
5153   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5154   if (isAllOnesConstant(N1) &&
5155       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5156     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5157     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5158       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5159       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5160       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5161       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5162       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5163     }
5164   }
5165   // fold (xor (and x, y), y) -> (and (not x), y)
5166   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5167       N0->getOperand(1) == N1) {
5168     SDValue X = N0->getOperand(0);
5169     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5170     AddToWorklist(NotX.getNode());
5171     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5172   }
5173   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5174   if (N1C && N0.getOpcode() == ISD::XOR) {
5175     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5176       SDLoc DL(N);
5177       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5178                          DAG.getConstant(N1C->getAPIntValue() ^
5179                                          N00C->getAPIntValue(), DL, VT));
5180     }
5181     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5182       SDLoc DL(N);
5183       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5184                          DAG.getConstant(N1C->getAPIntValue() ^
5185                                          N01C->getAPIntValue(), DL, VT));
5186     }
5187   }
5188 
5189   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5190   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5191   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5192       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5193       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5194     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5195       if (C->getAPIntValue() == (OpSizeInBits - 1))
5196         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5197   }
5198 
5199   // fold (xor x, x) -> 0
5200   if (N0 == N1)
5201     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5202 
5203   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5204   // Here is a concrete example of this equivalence:
5205   // i16   x ==  14
5206   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5207   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5208   //
5209   // =>
5210   //
5211   // i16     ~1      == 0b1111111111111110
5212   // i16 rol(~1, 14) == 0b1011111111111111
5213   //
5214   // Some additional tips to help conceptualize this transform:
5215   // - Try to see the operation as placing a single zero in a value of all ones.
5216   // - There exists no value for x which would allow the result to contain zero.
5217   // - Values of x larger than the bitwidth are undefined and do not require a
5218   //   consistent result.
5219   // - Pushing the zero left requires shifting one bits in from the right.
5220   // A rotate left of ~1 is a nice way of achieving the desired result.
5221   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5222       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5223     SDLoc DL(N);
5224     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5225                        N0.getOperand(1));
5226   }
5227 
5228   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5229   if (N0.getOpcode() == N1.getOpcode())
5230     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5231       return Tmp;
5232 
5233   // Simplify the expression using non-local knowledge.
5234   if (SimplifyDemandedBits(SDValue(N, 0)))
5235     return SDValue(N, 0);
5236 
5237   return SDValue();
5238 }
5239 
5240 /// Handle transforms common to the three shifts, when the shift amount is a
5241 /// constant.
5242 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5243   SDNode *LHS = N->getOperand(0).getNode();
5244   if (!LHS->hasOneUse()) return SDValue();
5245 
5246   // We want to pull some binops through shifts, so that we have (and (shift))
5247   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5248   // thing happens with address calculations, so it's important to canonicalize
5249   // it.
5250   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5251 
5252   switch (LHS->getOpcode()) {
5253   default: return SDValue();
5254   case ISD::OR:
5255   case ISD::XOR:
5256     HighBitSet = false; // We can only transform sra if the high bit is clear.
5257     break;
5258   case ISD::AND:
5259     HighBitSet = true;  // We can only transform sra if the high bit is set.
5260     break;
5261   case ISD::ADD:
5262     if (N->getOpcode() != ISD::SHL)
5263       return SDValue(); // only shl(add) not sr[al](add).
5264     HighBitSet = false; // We can only transform sra if the high bit is clear.
5265     break;
5266   }
5267 
5268   // We require the RHS of the binop to be a constant and not opaque as well.
5269   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5270   if (!BinOpCst) return SDValue();
5271 
5272   // FIXME: disable this unless the input to the binop is a shift by a constant
5273   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5274   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5275   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5276                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5277                  BinOpLHSVal->getOpcode() == ISD::SRL;
5278   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5279                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5280 
5281   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5282       !isCopyOrSelect)
5283     return SDValue();
5284 
5285   if (isCopyOrSelect && N->hasOneUse())
5286     return SDValue();
5287 
5288   EVT VT = N->getValueType(0);
5289 
5290   // If this is a signed shift right, and the high bit is modified by the
5291   // logical operation, do not perform the transformation. The highBitSet
5292   // boolean indicates the value of the high bit of the constant which would
5293   // cause it to be modified for this operation.
5294   if (N->getOpcode() == ISD::SRA) {
5295     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5296     if (BinOpRHSSignSet != HighBitSet)
5297       return SDValue();
5298   }
5299 
5300   if (!TLI.isDesirableToCommuteWithShift(LHS))
5301     return SDValue();
5302 
5303   // Fold the constants, shifting the binop RHS by the shift amount.
5304   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5305                                N->getValueType(0),
5306                                LHS->getOperand(1), N->getOperand(1));
5307   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5308 
5309   // Create the new shift.
5310   SDValue NewShift = DAG.getNode(N->getOpcode(),
5311                                  SDLoc(LHS->getOperand(0)),
5312                                  VT, LHS->getOperand(0), N->getOperand(1));
5313 
5314   // Create the new binop.
5315   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5316 }
5317 
5318 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5319   assert(N->getOpcode() == ISD::TRUNCATE);
5320   assert(N->getOperand(0).getOpcode() == ISD::AND);
5321 
5322   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5323   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5324     SDValue N01 = N->getOperand(0).getOperand(1);
5325     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5326       SDLoc DL(N);
5327       EVT TruncVT = N->getValueType(0);
5328       SDValue N00 = N->getOperand(0).getOperand(0);
5329       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5330       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5331       AddToWorklist(Trunc00.getNode());
5332       AddToWorklist(Trunc01.getNode());
5333       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5334     }
5335   }
5336 
5337   return SDValue();
5338 }
5339 
5340 SDValue DAGCombiner::visitRotate(SDNode *N) {
5341   SDLoc dl(N);
5342   SDValue N0 = N->getOperand(0);
5343   SDValue N1 = N->getOperand(1);
5344   EVT VT = N->getValueType(0);
5345   unsigned Bitsize = VT.getScalarSizeInBits();
5346 
5347   // fold (rot x, 0) -> x
5348   if (isNullConstantOrNullSplatConstant(N1))
5349     return N0;
5350 
5351   // fold (rot x, c) -> (rot x, c % BitSize)
5352   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5353     if (Cst->getAPIntValue().uge(Bitsize)) {
5354       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5355       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5356                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5357     }
5358   }
5359 
5360   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5361   if (N1.getOpcode() == ISD::TRUNCATE &&
5362       N1.getOperand(0).getOpcode() == ISD::AND) {
5363     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5364       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5365   }
5366 
5367   unsigned NextOp = N0.getOpcode();
5368   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5369   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5370     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5371     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5372     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5373       EVT ShiftVT = C1->getValueType(0);
5374       bool SameSide = (N->getOpcode() == NextOp);
5375       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5376       if (SDValue CombinedShift =
5377               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5378         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5379         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5380             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5381             BitsizeC.getNode());
5382         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5383                            CombinedShiftNorm);
5384       }
5385     }
5386   }
5387   return SDValue();
5388 }
5389 
5390 SDValue DAGCombiner::visitSHL(SDNode *N) {
5391   SDValue N0 = N->getOperand(0);
5392   SDValue N1 = N->getOperand(1);
5393   EVT VT = N0.getValueType();
5394   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5395 
5396   // fold vector ops
5397   if (VT.isVector()) {
5398     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5399       return FoldedVOp;
5400 
5401     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5402     // If setcc produces all-one true value then:
5403     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5404     if (N1CV && N1CV->isConstant()) {
5405       if (N0.getOpcode() == ISD::AND) {
5406         SDValue N00 = N0->getOperand(0);
5407         SDValue N01 = N0->getOperand(1);
5408         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5409 
5410         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5411             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5412                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5413           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5414                                                      N01CV, N1CV))
5415             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5416         }
5417       }
5418     }
5419   }
5420 
5421   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5422 
5423   // fold (shl c1, c2) -> c1<<c2
5424   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5425   if (N0C && N1C && !N1C->isOpaque())
5426     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5427   // fold (shl 0, x) -> 0
5428   if (isNullConstantOrNullSplatConstant(N0))
5429     return N0;
5430   // fold (shl x, c >= size(x)) -> undef
5431   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5432   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5433     return Val->getAPIntValue().uge(OpSizeInBits);
5434   };
5435   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5436     return DAG.getUNDEF(VT);
5437   // fold (shl x, 0) -> x
5438   if (N1C && N1C->isNullValue())
5439     return N0;
5440   // fold (shl undef, x) -> 0
5441   if (N0.isUndef())
5442     return DAG.getConstant(0, SDLoc(N), VT);
5443 
5444   if (SDValue NewSel = foldBinOpIntoSelect(N))
5445     return NewSel;
5446 
5447   // if (shl x, c) is known to be zero, return 0
5448   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5449                             APInt::getAllOnesValue(OpSizeInBits)))
5450     return DAG.getConstant(0, SDLoc(N), VT);
5451   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5452   if (N1.getOpcode() == ISD::TRUNCATE &&
5453       N1.getOperand(0).getOpcode() == ISD::AND) {
5454     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5455       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5456   }
5457 
5458   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5459     return SDValue(N, 0);
5460 
5461   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5462   if (N0.getOpcode() == ISD::SHL) {
5463     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5464                                           ConstantSDNode *RHS) {
5465       APInt c1 = LHS->getAPIntValue();
5466       APInt c2 = RHS->getAPIntValue();
5467       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5468       return (c1 + c2).uge(OpSizeInBits);
5469     };
5470     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5471       return DAG.getConstant(0, SDLoc(N), VT);
5472 
5473     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5474                                        ConstantSDNode *RHS) {
5475       APInt c1 = LHS->getAPIntValue();
5476       APInt c2 = RHS->getAPIntValue();
5477       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5478       return (c1 + c2).ult(OpSizeInBits);
5479     };
5480     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5481       SDLoc DL(N);
5482       EVT ShiftVT = N1.getValueType();
5483       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5484       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5485     }
5486   }
5487 
5488   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5489   // For this to be valid, the second form must not preserve any of the bits
5490   // that are shifted out by the inner shift in the first form.  This means
5491   // the outer shift size must be >= the number of bits added by the ext.
5492   // As a corollary, we don't care what kind of ext it is.
5493   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5494               N0.getOpcode() == ISD::ANY_EXTEND ||
5495               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5496       N0.getOperand(0).getOpcode() == ISD::SHL) {
5497     SDValue N0Op0 = N0.getOperand(0);
5498     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5499       APInt c1 = N0Op0C1->getAPIntValue();
5500       APInt c2 = N1C->getAPIntValue();
5501       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5502 
5503       EVT InnerShiftVT = N0Op0.getValueType();
5504       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5505       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5506         SDLoc DL(N0);
5507         APInt Sum = c1 + c2;
5508         if (Sum.uge(OpSizeInBits))
5509           return DAG.getConstant(0, DL, VT);
5510 
5511         return DAG.getNode(
5512             ISD::SHL, DL, VT,
5513             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5514             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5515       }
5516     }
5517   }
5518 
5519   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5520   // Only fold this if the inner zext has no other uses to avoid increasing
5521   // the total number of instructions.
5522   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5523       N0.getOperand(0).getOpcode() == ISD::SRL) {
5524     SDValue N0Op0 = N0.getOperand(0);
5525     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5526       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5527         uint64_t c1 = N0Op0C1->getZExtValue();
5528         uint64_t c2 = N1C->getZExtValue();
5529         if (c1 == c2) {
5530           SDValue NewOp0 = N0.getOperand(0);
5531           EVT CountVT = NewOp0.getOperand(1).getValueType();
5532           SDLoc DL(N);
5533           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5534                                        NewOp0,
5535                                        DAG.getConstant(c2, DL, CountVT));
5536           AddToWorklist(NewSHL.getNode());
5537           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5538         }
5539       }
5540     }
5541   }
5542 
5543   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5544   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5545   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5546       N0->getFlags().hasExact()) {
5547     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5548       uint64_t C1 = N0C1->getZExtValue();
5549       uint64_t C2 = N1C->getZExtValue();
5550       SDLoc DL(N);
5551       if (C1 <= C2)
5552         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5553                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5554       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5555                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5556     }
5557   }
5558 
5559   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5560   //                               (and (srl x, (sub c1, c2), MASK)
5561   // Only fold this if the inner shift has no other uses -- if it does, folding
5562   // this will increase the total number of instructions.
5563   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5564     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5565       uint64_t c1 = N0C1->getZExtValue();
5566       if (c1 < OpSizeInBits) {
5567         uint64_t c2 = N1C->getZExtValue();
5568         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5569         SDValue Shift;
5570         if (c2 > c1) {
5571           Mask <<= c2 - c1;
5572           SDLoc DL(N);
5573           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5574                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5575         } else {
5576           Mask.lshrInPlace(c1 - c2);
5577           SDLoc DL(N);
5578           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5579                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5580         }
5581         SDLoc DL(N0);
5582         return DAG.getNode(ISD::AND, DL, VT, Shift,
5583                            DAG.getConstant(Mask, DL, VT));
5584       }
5585     }
5586   }
5587 
5588   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5589   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5590       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5591     SDLoc DL(N);
5592     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5593     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5594     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5595   }
5596 
5597   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5598   // Variant of version done on multiply, except mul by a power of 2 is turned
5599   // into a shift.
5600   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5601       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5602       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5603     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5604     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5605     AddToWorklist(Shl0.getNode());
5606     AddToWorklist(Shl1.getNode());
5607     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5608   }
5609 
5610   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5611   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5612       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5613       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5614     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5615     if (isConstantOrConstantVector(Shl))
5616       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5617   }
5618 
5619   if (N1C && !N1C->isOpaque())
5620     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5621       return NewSHL;
5622 
5623   return SDValue();
5624 }
5625 
5626 SDValue DAGCombiner::visitSRA(SDNode *N) {
5627   SDValue N0 = N->getOperand(0);
5628   SDValue N1 = N->getOperand(1);
5629   EVT VT = N0.getValueType();
5630   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5631 
5632   // Arithmetic shifting an all-sign-bit value is a no-op.
5633   // fold (sra 0, x) -> 0
5634   // fold (sra -1, x) -> -1
5635   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5636     return N0;
5637 
5638   // fold vector ops
5639   if (VT.isVector())
5640     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5641       return FoldedVOp;
5642 
5643   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5644 
5645   // fold (sra c1, c2) -> (sra c1, c2)
5646   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5647   if (N0C && N1C && !N1C->isOpaque())
5648     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5649   // fold (sra x, c >= size(x)) -> undef
5650   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5651   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5652     return Val->getAPIntValue().uge(OpSizeInBits);
5653   };
5654   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5655     return DAG.getUNDEF(VT);
5656   // fold (sra x, 0) -> x
5657   if (N1C && N1C->isNullValue())
5658     return N0;
5659 
5660   if (SDValue NewSel = foldBinOpIntoSelect(N))
5661     return NewSel;
5662 
5663   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5664   // sext_inreg.
5665   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5666     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5667     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5668     if (VT.isVector())
5669       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5670                                ExtVT, VT.getVectorNumElements());
5671     if ((!LegalOperations ||
5672          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5673       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5674                          N0.getOperand(0), DAG.getValueType(ExtVT));
5675   }
5676 
5677   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5678   if (N0.getOpcode() == ISD::SRA) {
5679     SDLoc DL(N);
5680     EVT ShiftVT = N1.getValueType();
5681 
5682     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5683                                           ConstantSDNode *RHS) {
5684       APInt c1 = LHS->getAPIntValue();
5685       APInt c2 = RHS->getAPIntValue();
5686       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5687       return (c1 + c2).uge(OpSizeInBits);
5688     };
5689     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5690       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5691                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5692 
5693     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5694                                        ConstantSDNode *RHS) {
5695       APInt c1 = LHS->getAPIntValue();
5696       APInt c2 = RHS->getAPIntValue();
5697       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5698       return (c1 + c2).ult(OpSizeInBits);
5699     };
5700     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5701       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5702       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5703     }
5704   }
5705 
5706   // fold (sra (shl X, m), (sub result_size, n))
5707   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5708   // result_size - n != m.
5709   // If truncate is free for the target sext(shl) is likely to result in better
5710   // code.
5711   if (N0.getOpcode() == ISD::SHL && N1C) {
5712     // Get the two constanst of the shifts, CN0 = m, CN = n.
5713     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5714     if (N01C) {
5715       LLVMContext &Ctx = *DAG.getContext();
5716       // Determine what the truncate's result bitsize and type would be.
5717       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5718 
5719       if (VT.isVector())
5720         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5721 
5722       // Determine the residual right-shift amount.
5723       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5724 
5725       // If the shift is not a no-op (in which case this should be just a sign
5726       // extend already), the truncated to type is legal, sign_extend is legal
5727       // on that type, and the truncate to that type is both legal and free,
5728       // perform the transform.
5729       if ((ShiftAmt > 0) &&
5730           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5731           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5732           TLI.isTruncateFree(VT, TruncVT)) {
5733 
5734         SDLoc DL(N);
5735         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5736             getShiftAmountTy(N0.getOperand(0).getValueType()));
5737         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5738                                     N0.getOperand(0), Amt);
5739         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5740                                     Shift);
5741         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5742                            N->getValueType(0), Trunc);
5743       }
5744     }
5745   }
5746 
5747   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5748   if (N1.getOpcode() == ISD::TRUNCATE &&
5749       N1.getOperand(0).getOpcode() == ISD::AND) {
5750     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5751       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5752   }
5753 
5754   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5755   //      if c1 is equal to the number of bits the trunc removes
5756   if (N0.getOpcode() == ISD::TRUNCATE &&
5757       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5758        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5759       N0.getOperand(0).hasOneUse() &&
5760       N0.getOperand(0).getOperand(1).hasOneUse() &&
5761       N1C) {
5762     SDValue N0Op0 = N0.getOperand(0);
5763     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5764       unsigned LargeShiftVal = LargeShift->getZExtValue();
5765       EVT LargeVT = N0Op0.getValueType();
5766 
5767       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5768         SDLoc DL(N);
5769         SDValue Amt =
5770           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5771                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5772         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5773                                   N0Op0.getOperand(0), Amt);
5774         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5775       }
5776     }
5777   }
5778 
5779   // Simplify, based on bits shifted out of the LHS.
5780   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5781     return SDValue(N, 0);
5782 
5783 
5784   // If the sign bit is known to be zero, switch this to a SRL.
5785   if (DAG.SignBitIsZero(N0))
5786     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5787 
5788   if (N1C && !N1C->isOpaque())
5789     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5790       return NewSRA;
5791 
5792   return SDValue();
5793 }
5794 
5795 SDValue DAGCombiner::visitSRL(SDNode *N) {
5796   SDValue N0 = N->getOperand(0);
5797   SDValue N1 = N->getOperand(1);
5798   EVT VT = N0.getValueType();
5799   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5800 
5801   // fold vector ops
5802   if (VT.isVector())
5803     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5804       return FoldedVOp;
5805 
5806   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5807 
5808   // fold (srl c1, c2) -> c1 >>u c2
5809   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5810   if (N0C && N1C && !N1C->isOpaque())
5811     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5812   // fold (srl 0, x) -> 0
5813   if (isNullConstantOrNullSplatConstant(N0))
5814     return N0;
5815   // fold (srl x, c >= size(x)) -> undef
5816   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5817   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5818     return Val->getAPIntValue().uge(OpSizeInBits);
5819   };
5820   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5821     return DAG.getUNDEF(VT);
5822   // fold (srl x, 0) -> x
5823   if (N1C && N1C->isNullValue())
5824     return N0;
5825 
5826   if (SDValue NewSel = foldBinOpIntoSelect(N))
5827     return NewSel;
5828 
5829   // if (srl x, c) is known to be zero, return 0
5830   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5831                                    APInt::getAllOnesValue(OpSizeInBits)))
5832     return DAG.getConstant(0, SDLoc(N), VT);
5833 
5834   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5835   if (N0.getOpcode() == ISD::SRL) {
5836     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5837                                           ConstantSDNode *RHS) {
5838       APInt c1 = LHS->getAPIntValue();
5839       APInt c2 = RHS->getAPIntValue();
5840       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5841       return (c1 + c2).uge(OpSizeInBits);
5842     };
5843     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5844       return DAG.getConstant(0, SDLoc(N), VT);
5845 
5846     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5847                                        ConstantSDNode *RHS) {
5848       APInt c1 = LHS->getAPIntValue();
5849       APInt c2 = RHS->getAPIntValue();
5850       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5851       return (c1 + c2).ult(OpSizeInBits);
5852     };
5853     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5854       SDLoc DL(N);
5855       EVT ShiftVT = N1.getValueType();
5856       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5857       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
5858     }
5859   }
5860 
5861   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5862   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5863       N0.getOperand(0).getOpcode() == ISD::SRL) {
5864     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5865       uint64_t c1 = N001C->getZExtValue();
5866       uint64_t c2 = N1C->getZExtValue();
5867       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5868       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5869       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5870       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5871       if (c1 + OpSizeInBits == InnerShiftSize) {
5872         SDLoc DL(N0);
5873         if (c1 + c2 >= InnerShiftSize)
5874           return DAG.getConstant(0, DL, VT);
5875         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5876                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5877                                        N0.getOperand(0).getOperand(0),
5878                                        DAG.getConstant(c1 + c2, DL,
5879                                                        ShiftCountVT)));
5880       }
5881     }
5882   }
5883 
5884   // fold (srl (shl x, c), c) -> (and x, cst2)
5885   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5886       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5887     SDLoc DL(N);
5888     SDValue Mask =
5889         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5890     AddToWorklist(Mask.getNode());
5891     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5892   }
5893 
5894   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5895   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5896     // Shifting in all undef bits?
5897     EVT SmallVT = N0.getOperand(0).getValueType();
5898     unsigned BitSize = SmallVT.getScalarSizeInBits();
5899     if (N1C->getZExtValue() >= BitSize)
5900       return DAG.getUNDEF(VT);
5901 
5902     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5903       uint64_t ShiftAmt = N1C->getZExtValue();
5904       SDLoc DL0(N0);
5905       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5906                                        N0.getOperand(0),
5907                           DAG.getConstant(ShiftAmt, DL0,
5908                                           getShiftAmountTy(SmallVT)));
5909       AddToWorklist(SmallShift.getNode());
5910       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5911       SDLoc DL(N);
5912       return DAG.getNode(ISD::AND, DL, VT,
5913                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5914                          DAG.getConstant(Mask, DL, VT));
5915     }
5916   }
5917 
5918   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5919   // bit, which is unmodified by sra.
5920   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5921     if (N0.getOpcode() == ISD::SRA)
5922       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5923   }
5924 
5925   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5926   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5927       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5928     KnownBits Known;
5929     DAG.computeKnownBits(N0.getOperand(0), Known);
5930 
5931     // If any of the input bits are KnownOne, then the input couldn't be all
5932     // zeros, thus the result of the srl will always be zero.
5933     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5934 
5935     // If all of the bits input the to ctlz node are known to be zero, then
5936     // the result of the ctlz is "32" and the result of the shift is one.
5937     APInt UnknownBits = ~Known.Zero;
5938     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5939 
5940     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5941     if (UnknownBits.isPowerOf2()) {
5942       // Okay, we know that only that the single bit specified by UnknownBits
5943       // could be set on input to the CTLZ node. If this bit is set, the SRL
5944       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5945       // to an SRL/XOR pair, which is likely to simplify more.
5946       unsigned ShAmt = UnknownBits.countTrailingZeros();
5947       SDValue Op = N0.getOperand(0);
5948 
5949       if (ShAmt) {
5950         SDLoc DL(N0);
5951         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5952                   DAG.getConstant(ShAmt, DL,
5953                                   getShiftAmountTy(Op.getValueType())));
5954         AddToWorklist(Op.getNode());
5955       }
5956 
5957       SDLoc DL(N);
5958       return DAG.getNode(ISD::XOR, DL, VT,
5959                          Op, DAG.getConstant(1, DL, VT));
5960     }
5961   }
5962 
5963   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5964   if (N1.getOpcode() == ISD::TRUNCATE &&
5965       N1.getOperand(0).getOpcode() == ISD::AND) {
5966     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5967       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5968   }
5969 
5970   // fold operands of srl based on knowledge that the low bits are not
5971   // demanded.
5972   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5973     return SDValue(N, 0);
5974 
5975   if (N1C && !N1C->isOpaque())
5976     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5977       return NewSRL;
5978 
5979   // Attempt to convert a srl of a load into a narrower zero-extending load.
5980   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5981     return NarrowLoad;
5982 
5983   // Here is a common situation. We want to optimize:
5984   //
5985   //   %a = ...
5986   //   %b = and i32 %a, 2
5987   //   %c = srl i32 %b, 1
5988   //   brcond i32 %c ...
5989   //
5990   // into
5991   //
5992   //   %a = ...
5993   //   %b = and %a, 2
5994   //   %c = setcc eq %b, 0
5995   //   brcond %c ...
5996   //
5997   // However when after the source operand of SRL is optimized into AND, the SRL
5998   // itself may not be optimized further. Look for it and add the BRCOND into
5999   // the worklist.
6000   if (N->hasOneUse()) {
6001     SDNode *Use = *N->use_begin();
6002     if (Use->getOpcode() == ISD::BRCOND)
6003       AddToWorklist(Use);
6004     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6005       // Also look pass the truncate.
6006       Use = *Use->use_begin();
6007       if (Use->getOpcode() == ISD::BRCOND)
6008         AddToWorklist(Use);
6009     }
6010   }
6011 
6012   return SDValue();
6013 }
6014 
6015 SDValue DAGCombiner::visitABS(SDNode *N) {
6016   SDValue N0 = N->getOperand(0);
6017   EVT VT = N->getValueType(0);
6018 
6019   // fold (abs c1) -> c2
6020   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6021     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6022   // fold (abs (abs x)) -> (abs x)
6023   if (N0.getOpcode() == ISD::ABS)
6024     return N0;
6025   // fold (abs x) -> x iff not-negative
6026   if (DAG.SignBitIsZero(N0))
6027     return N0;
6028   return SDValue();
6029 }
6030 
6031 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6032   SDValue N0 = N->getOperand(0);
6033   EVT VT = N->getValueType(0);
6034 
6035   // fold (bswap c1) -> c2
6036   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6037     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6038   // fold (bswap (bswap x)) -> x
6039   if (N0.getOpcode() == ISD::BSWAP)
6040     return N0->getOperand(0);
6041   return SDValue();
6042 }
6043 
6044 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6045   SDValue N0 = N->getOperand(0);
6046   EVT VT = N->getValueType(0);
6047 
6048   // fold (bitreverse c1) -> c2
6049   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6050     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6051   // fold (bitreverse (bitreverse x)) -> x
6052   if (N0.getOpcode() == ISD::BITREVERSE)
6053     return N0.getOperand(0);
6054   return SDValue();
6055 }
6056 
6057 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6058   SDValue N0 = N->getOperand(0);
6059   EVT VT = N->getValueType(0);
6060 
6061   // fold (ctlz c1) -> c2
6062   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6063     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6064   return SDValue();
6065 }
6066 
6067 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6068   SDValue N0 = N->getOperand(0);
6069   EVT VT = N->getValueType(0);
6070 
6071   // fold (ctlz_zero_undef c1) -> c2
6072   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6073     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6074   return SDValue();
6075 }
6076 
6077 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6078   SDValue N0 = N->getOperand(0);
6079   EVT VT = N->getValueType(0);
6080 
6081   // fold (cttz c1) -> c2
6082   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6083     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6084   return SDValue();
6085 }
6086 
6087 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6088   SDValue N0 = N->getOperand(0);
6089   EVT VT = N->getValueType(0);
6090 
6091   // fold (cttz_zero_undef c1) -> c2
6092   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6093     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6094   return SDValue();
6095 }
6096 
6097 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6098   SDValue N0 = N->getOperand(0);
6099   EVT VT = N->getValueType(0);
6100 
6101   // fold (ctpop c1) -> c2
6102   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6103     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6104   return SDValue();
6105 }
6106 
6107 
6108 /// \brief Generate Min/Max node
6109 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6110                                    SDValue RHS, SDValue True, SDValue False,
6111                                    ISD::CondCode CC, const TargetLowering &TLI,
6112                                    SelectionDAG &DAG) {
6113   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6114     return SDValue();
6115 
6116   switch (CC) {
6117   case ISD::SETOLT:
6118   case ISD::SETOLE:
6119   case ISD::SETLT:
6120   case ISD::SETLE:
6121   case ISD::SETULT:
6122   case ISD::SETULE: {
6123     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6124     if (TLI.isOperationLegal(Opcode, VT))
6125       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6126     return SDValue();
6127   }
6128   case ISD::SETOGT:
6129   case ISD::SETOGE:
6130   case ISD::SETGT:
6131   case ISD::SETGE:
6132   case ISD::SETUGT:
6133   case ISD::SETUGE: {
6134     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6135     if (TLI.isOperationLegal(Opcode, VT))
6136       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6137     return SDValue();
6138   }
6139   default:
6140     return SDValue();
6141   }
6142 }
6143 
6144 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6145   SDValue Cond = N->getOperand(0);
6146   SDValue N1 = N->getOperand(1);
6147   SDValue N2 = N->getOperand(2);
6148   EVT VT = N->getValueType(0);
6149   EVT CondVT = Cond.getValueType();
6150   SDLoc DL(N);
6151 
6152   if (!VT.isInteger())
6153     return SDValue();
6154 
6155   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6156   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6157   if (!C1 || !C2)
6158     return SDValue();
6159 
6160   // Only do this before legalization to avoid conflicting with target-specific
6161   // transforms in the other direction (create a select from a zext/sext). There
6162   // is also a target-independent combine here in DAGCombiner in the other
6163   // direction for (select Cond, -1, 0) when the condition is not i1.
6164   if (CondVT == MVT::i1 && !LegalOperations) {
6165     if (C1->isNullValue() && C2->isOne()) {
6166       // select Cond, 0, 1 --> zext (!Cond)
6167       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6168       if (VT != MVT::i1)
6169         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6170       return NotCond;
6171     }
6172     if (C1->isNullValue() && C2->isAllOnesValue()) {
6173       // select Cond, 0, -1 --> sext (!Cond)
6174       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6175       if (VT != MVT::i1)
6176         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6177       return NotCond;
6178     }
6179     if (C1->isOne() && C2->isNullValue()) {
6180       // select Cond, 1, 0 --> zext (Cond)
6181       if (VT != MVT::i1)
6182         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6183       return Cond;
6184     }
6185     if (C1->isAllOnesValue() && C2->isNullValue()) {
6186       // select Cond, -1, 0 --> sext (Cond)
6187       if (VT != MVT::i1)
6188         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6189       return Cond;
6190     }
6191 
6192     // For any constants that differ by 1, we can transform the select into an
6193     // extend and add. Use a target hook because some targets may prefer to
6194     // transform in the other direction.
6195     if (TLI.convertSelectOfConstantsToMath(VT)) {
6196       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6197         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6198         if (VT != MVT::i1)
6199           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6200         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6201       }
6202       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6203         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6204         if (VT != MVT::i1)
6205           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6206         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6207       }
6208     }
6209 
6210     return SDValue();
6211   }
6212 
6213   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6214   // We can't do this reliably if integer based booleans have different contents
6215   // to floating point based booleans. This is because we can't tell whether we
6216   // have an integer-based boolean or a floating-point-based boolean unless we
6217   // can find the SETCC that produced it and inspect its operands. This is
6218   // fairly easy if C is the SETCC node, but it can potentially be
6219   // undiscoverable (or not reasonably discoverable). For example, it could be
6220   // in another basic block or it could require searching a complicated
6221   // expression.
6222   if (CondVT.isInteger() &&
6223       TLI.getBooleanContents(false, true) ==
6224           TargetLowering::ZeroOrOneBooleanContent &&
6225       TLI.getBooleanContents(false, false) ==
6226           TargetLowering::ZeroOrOneBooleanContent &&
6227       C1->isNullValue() && C2->isOne()) {
6228     SDValue NotCond =
6229         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6230     if (VT.bitsEq(CondVT))
6231       return NotCond;
6232     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6233   }
6234 
6235   return SDValue();
6236 }
6237 
6238 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6239   SDValue N0 = N->getOperand(0);
6240   SDValue N1 = N->getOperand(1);
6241   SDValue N2 = N->getOperand(2);
6242   EVT VT = N->getValueType(0);
6243   EVT VT0 = N0.getValueType();
6244   SDLoc DL(N);
6245 
6246   // fold (select C, X, X) -> X
6247   if (N1 == N2)
6248     return N1;
6249 
6250   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6251     // fold (select true, X, Y) -> X
6252     // fold (select false, X, Y) -> Y
6253     return !N0C->isNullValue() ? N1 : N2;
6254   }
6255 
6256   // fold (select X, X, Y) -> (or X, Y)
6257   // fold (select X, 1, Y) -> (or C, Y)
6258   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6259     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6260 
6261   if (SDValue V = foldSelectOfConstants(N))
6262     return V;
6263 
6264   // fold (select C, 0, X) -> (and (not C), X)
6265   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6266     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6267     AddToWorklist(NOTNode.getNode());
6268     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6269   }
6270   // fold (select C, X, 1) -> (or (not C), X)
6271   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6272     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6273     AddToWorklist(NOTNode.getNode());
6274     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6275   }
6276   // fold (select X, Y, X) -> (and X, Y)
6277   // fold (select X, Y, 0) -> (and X, Y)
6278   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6279     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6280 
6281   // If we can fold this based on the true/false value, do so.
6282   if (SimplifySelectOps(N, N1, N2))
6283     return SDValue(N, 0); // Don't revisit N.
6284 
6285   if (VT0 == MVT::i1) {
6286     // The code in this block deals with the following 2 equivalences:
6287     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6288     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6289     // The target can specify its preferred form with the
6290     // shouldNormalizeToSelectSequence() callback. However we always transform
6291     // to the right anyway if we find the inner select exists in the DAG anyway
6292     // and we always transform to the left side if we know that we can further
6293     // optimize the combination of the conditions.
6294     bool normalizeToSequence =
6295         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6296     // select (and Cond0, Cond1), X, Y
6297     //   -> select Cond0, (select Cond1, X, Y), Y
6298     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6299       SDValue Cond0 = N0->getOperand(0);
6300       SDValue Cond1 = N0->getOperand(1);
6301       SDValue InnerSelect =
6302           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6303       if (normalizeToSequence || !InnerSelect.use_empty())
6304         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6305                            InnerSelect, N2);
6306     }
6307     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6308     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6309       SDValue Cond0 = N0->getOperand(0);
6310       SDValue Cond1 = N0->getOperand(1);
6311       SDValue InnerSelect =
6312           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6313       if (normalizeToSequence || !InnerSelect.use_empty())
6314         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6315                            InnerSelect);
6316     }
6317 
6318     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6319     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6320       SDValue N1_0 = N1->getOperand(0);
6321       SDValue N1_1 = N1->getOperand(1);
6322       SDValue N1_2 = N1->getOperand(2);
6323       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6324         // Create the actual and node if we can generate good code for it.
6325         if (!normalizeToSequence) {
6326           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6327           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6328         }
6329         // Otherwise see if we can optimize the "and" to a better pattern.
6330         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6331           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6332                              N2);
6333       }
6334     }
6335     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6336     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6337       SDValue N2_0 = N2->getOperand(0);
6338       SDValue N2_1 = N2->getOperand(1);
6339       SDValue N2_2 = N2->getOperand(2);
6340       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6341         // Create the actual or node if we can generate good code for it.
6342         if (!normalizeToSequence) {
6343           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6344           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6345         }
6346         // Otherwise see if we can optimize to a better pattern.
6347         if (SDValue Combined = visitORLike(N0, N2_0, N))
6348           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6349                              N2_2);
6350       }
6351     }
6352   }
6353 
6354   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6355   if (VT0 == MVT::i1) {
6356     if (N0->getOpcode() == ISD::XOR) {
6357       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6358         SDValue Cond0 = N0->getOperand(0);
6359         if (C->isOne())
6360           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6361       }
6362     }
6363   }
6364 
6365   // fold selects based on a setcc into other things, such as min/max/abs
6366   if (N0.getOpcode() == ISD::SETCC) {
6367     // select x, y (fcmp lt x, y) -> fminnum x, y
6368     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6369     //
6370     // This is OK if we don't care about what happens if either operand is a
6371     // NaN.
6372     //
6373 
6374     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6375     // no signed zeros as well as no nans.
6376     const TargetOptions &Options = DAG.getTarget().Options;
6377     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6378         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6379       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6380 
6381       if (SDValue FMinMax = combineMinNumMaxNum(
6382               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6383         return FMinMax;
6384     }
6385 
6386     if ((!LegalOperations &&
6387          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6388         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6389       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6390                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6391     return SimplifySelect(DL, N0, N1, N2);
6392   }
6393 
6394   return SDValue();
6395 }
6396 
6397 static
6398 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6399   SDLoc DL(N);
6400   EVT LoVT, HiVT;
6401   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6402 
6403   // Split the inputs.
6404   SDValue Lo, Hi, LL, LH, RL, RH;
6405   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6406   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6407 
6408   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6409   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6410 
6411   return std::make_pair(Lo, Hi);
6412 }
6413 
6414 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6415 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6416 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6417   SDLoc DL(N);
6418   SDValue Cond = N->getOperand(0);
6419   SDValue LHS = N->getOperand(1);
6420   SDValue RHS = N->getOperand(2);
6421   EVT VT = N->getValueType(0);
6422   int NumElems = VT.getVectorNumElements();
6423   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6424          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6425          Cond.getOpcode() == ISD::BUILD_VECTOR);
6426 
6427   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6428   // binary ones here.
6429   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6430     return SDValue();
6431 
6432   // We're sure we have an even number of elements due to the
6433   // concat_vectors we have as arguments to vselect.
6434   // Skip BV elements until we find one that's not an UNDEF
6435   // After we find an UNDEF element, keep looping until we get to half the
6436   // length of the BV and see if all the non-undef nodes are the same.
6437   ConstantSDNode *BottomHalf = nullptr;
6438   for (int i = 0; i < NumElems / 2; ++i) {
6439     if (Cond->getOperand(i)->isUndef())
6440       continue;
6441 
6442     if (BottomHalf == nullptr)
6443       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6444     else if (Cond->getOperand(i).getNode() != BottomHalf)
6445       return SDValue();
6446   }
6447 
6448   // Do the same for the second half of the BuildVector
6449   ConstantSDNode *TopHalf = nullptr;
6450   for (int i = NumElems / 2; i < NumElems; ++i) {
6451     if (Cond->getOperand(i)->isUndef())
6452       continue;
6453 
6454     if (TopHalf == nullptr)
6455       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6456     else if (Cond->getOperand(i).getNode() != TopHalf)
6457       return SDValue();
6458   }
6459 
6460   assert(TopHalf && BottomHalf &&
6461          "One half of the selector was all UNDEFs and the other was all the "
6462          "same value. This should have been addressed before this function.");
6463   return DAG.getNode(
6464       ISD::CONCAT_VECTORS, DL, VT,
6465       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6466       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6467 }
6468 
6469 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6470 
6471   if (Level >= AfterLegalizeTypes)
6472     return SDValue();
6473 
6474   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6475   SDValue Mask = MSC->getMask();
6476   SDValue Data  = MSC->getValue();
6477   SDLoc DL(N);
6478 
6479   // If the MSCATTER data type requires splitting and the mask is provided by a
6480   // SETCC, then split both nodes and its operands before legalization. This
6481   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6482   // and enables future optimizations (e.g. min/max pattern matching on X86).
6483   if (Mask.getOpcode() != ISD::SETCC)
6484     return SDValue();
6485 
6486   // Check if any splitting is required.
6487   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6488       TargetLowering::TypeSplitVector)
6489     return SDValue();
6490   SDValue MaskLo, MaskHi, Lo, Hi;
6491   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6492 
6493   EVT LoVT, HiVT;
6494   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6495 
6496   SDValue Chain = MSC->getChain();
6497 
6498   EVT MemoryVT = MSC->getMemoryVT();
6499   unsigned Alignment = MSC->getOriginalAlignment();
6500 
6501   EVT LoMemVT, HiMemVT;
6502   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6503 
6504   SDValue DataLo, DataHi;
6505   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6506 
6507   SDValue BasePtr = MSC->getBasePtr();
6508   SDValue IndexLo, IndexHi;
6509   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6510 
6511   MachineMemOperand *MMO = DAG.getMachineFunction().
6512     getMachineMemOperand(MSC->getPointerInfo(),
6513                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6514                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6515 
6516   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6517   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6518                             DL, OpsLo, MMO);
6519 
6520   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6521   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6522                             DL, OpsHi, MMO);
6523 
6524   AddToWorklist(Lo.getNode());
6525   AddToWorklist(Hi.getNode());
6526 
6527   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6528 }
6529 
6530 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6531 
6532   if (Level >= AfterLegalizeTypes)
6533     return SDValue();
6534 
6535   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6536   SDValue Mask = MST->getMask();
6537   SDValue Data  = MST->getValue();
6538   EVT VT = Data.getValueType();
6539   SDLoc DL(N);
6540 
6541   // If the MSTORE data type requires splitting and the mask is provided by a
6542   // SETCC, then split both nodes and its operands before legalization. This
6543   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6544   // and enables future optimizations (e.g. min/max pattern matching on X86).
6545   if (Mask.getOpcode() == ISD::SETCC) {
6546 
6547     // Check if any splitting is required.
6548     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6549         TargetLowering::TypeSplitVector)
6550       return SDValue();
6551 
6552     SDValue MaskLo, MaskHi, Lo, Hi;
6553     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6554 
6555     SDValue Chain = MST->getChain();
6556     SDValue Ptr   = MST->getBasePtr();
6557 
6558     EVT MemoryVT = MST->getMemoryVT();
6559     unsigned Alignment = MST->getOriginalAlignment();
6560 
6561     // if Alignment is equal to the vector size,
6562     // take the half of it for the second part
6563     unsigned SecondHalfAlignment =
6564       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6565 
6566     EVT LoMemVT, HiMemVT;
6567     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6568 
6569     SDValue DataLo, DataHi;
6570     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6571 
6572     MachineMemOperand *MMO = DAG.getMachineFunction().
6573       getMachineMemOperand(MST->getPointerInfo(),
6574                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6575                            Alignment, MST->getAAInfo(), MST->getRanges());
6576 
6577     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6578                             MST->isTruncatingStore(),
6579                             MST->isCompressingStore());
6580 
6581     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6582                                      MST->isCompressingStore());
6583 
6584     MMO = DAG.getMachineFunction().
6585       getMachineMemOperand(MST->getPointerInfo(),
6586                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6587                            SecondHalfAlignment, MST->getAAInfo(),
6588                            MST->getRanges());
6589 
6590     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6591                             MST->isTruncatingStore(),
6592                             MST->isCompressingStore());
6593 
6594     AddToWorklist(Lo.getNode());
6595     AddToWorklist(Hi.getNode());
6596 
6597     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6598   }
6599   return SDValue();
6600 }
6601 
6602 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6603 
6604   if (Level >= AfterLegalizeTypes)
6605     return SDValue();
6606 
6607   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6608   SDValue Mask = MGT->getMask();
6609   SDLoc DL(N);
6610 
6611   // If the MGATHER result requires splitting and the mask is provided by a
6612   // SETCC, then split both nodes and its operands before legalization. This
6613   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6614   // and enables future optimizations (e.g. min/max pattern matching on X86).
6615 
6616   if (Mask.getOpcode() != ISD::SETCC)
6617     return SDValue();
6618 
6619   EVT VT = N->getValueType(0);
6620 
6621   // Check if any splitting is required.
6622   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6623       TargetLowering::TypeSplitVector)
6624     return SDValue();
6625 
6626   SDValue MaskLo, MaskHi, Lo, Hi;
6627   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6628 
6629   SDValue Src0 = MGT->getValue();
6630   SDValue Src0Lo, Src0Hi;
6631   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6632 
6633   EVT LoVT, HiVT;
6634   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6635 
6636   SDValue Chain = MGT->getChain();
6637   EVT MemoryVT = MGT->getMemoryVT();
6638   unsigned Alignment = MGT->getOriginalAlignment();
6639 
6640   EVT LoMemVT, HiMemVT;
6641   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6642 
6643   SDValue BasePtr = MGT->getBasePtr();
6644   SDValue Index = MGT->getIndex();
6645   SDValue IndexLo, IndexHi;
6646   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6647 
6648   MachineMemOperand *MMO = DAG.getMachineFunction().
6649     getMachineMemOperand(MGT->getPointerInfo(),
6650                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6651                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6652 
6653   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6654   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6655                             MMO);
6656 
6657   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6658   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6659                             MMO);
6660 
6661   AddToWorklist(Lo.getNode());
6662   AddToWorklist(Hi.getNode());
6663 
6664   // Build a factor node to remember that this load is independent of the
6665   // other one.
6666   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6667                       Hi.getValue(1));
6668 
6669   // Legalized the chain result - switch anything that used the old chain to
6670   // use the new one.
6671   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6672 
6673   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6674 
6675   SDValue RetOps[] = { GatherRes, Chain };
6676   return DAG.getMergeValues(RetOps, DL);
6677 }
6678 
6679 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6680 
6681   if (Level >= AfterLegalizeTypes)
6682     return SDValue();
6683 
6684   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6685   SDValue Mask = MLD->getMask();
6686   SDLoc DL(N);
6687 
6688   // If the MLOAD result requires splitting and the mask is provided by a
6689   // SETCC, then split both nodes and its operands before legalization. This
6690   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6691   // and enables future optimizations (e.g. min/max pattern matching on X86).
6692 
6693   if (Mask.getOpcode() == ISD::SETCC) {
6694     EVT VT = N->getValueType(0);
6695 
6696     // Check if any splitting is required.
6697     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6698         TargetLowering::TypeSplitVector)
6699       return SDValue();
6700 
6701     SDValue MaskLo, MaskHi, Lo, Hi;
6702     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6703 
6704     SDValue Src0 = MLD->getSrc0();
6705     SDValue Src0Lo, Src0Hi;
6706     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6707 
6708     EVT LoVT, HiVT;
6709     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6710 
6711     SDValue Chain = MLD->getChain();
6712     SDValue Ptr   = MLD->getBasePtr();
6713     EVT MemoryVT = MLD->getMemoryVT();
6714     unsigned Alignment = MLD->getOriginalAlignment();
6715 
6716     // if Alignment is equal to the vector size,
6717     // take the half of it for the second part
6718     unsigned SecondHalfAlignment =
6719       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6720          Alignment/2 : Alignment;
6721 
6722     EVT LoMemVT, HiMemVT;
6723     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6724 
6725     MachineMemOperand *MMO = DAG.getMachineFunction().
6726     getMachineMemOperand(MLD->getPointerInfo(),
6727                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6728                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6729 
6730     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6731                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6732 
6733     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6734                                      MLD->isExpandingLoad());
6735 
6736     MMO = DAG.getMachineFunction().
6737     getMachineMemOperand(MLD->getPointerInfo(),
6738                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6739                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6740 
6741     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6742                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6743 
6744     AddToWorklist(Lo.getNode());
6745     AddToWorklist(Hi.getNode());
6746 
6747     // Build a factor node to remember that this load is independent of the
6748     // other one.
6749     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6750                         Hi.getValue(1));
6751 
6752     // Legalized the chain result - switch anything that used the old chain to
6753     // use the new one.
6754     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6755 
6756     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6757 
6758     SDValue RetOps[] = { LoadRes, Chain };
6759     return DAG.getMergeValues(RetOps, DL);
6760   }
6761   return SDValue();
6762 }
6763 
6764 /// A vector select of 2 constant vectors can be simplified to math/logic to
6765 /// avoid a variable select instruction and possibly avoid constant loads.
6766 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
6767   SDValue Cond = N->getOperand(0);
6768   SDValue N1 = N->getOperand(1);
6769   SDValue N2 = N->getOperand(2);
6770   EVT VT = N->getValueType(0);
6771   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
6772       !TLI.convertSelectOfConstantsToMath(VT) ||
6773       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
6774       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
6775     return SDValue();
6776 
6777   // Check if we can use the condition value to increment/decrement a single
6778   // constant value. This simplifies a select to an add and removes a constant
6779   // load/materialization from the general case.
6780   bool AllAddOne = true;
6781   bool AllSubOne = true;
6782   unsigned Elts = VT.getVectorNumElements();
6783   for (unsigned i = 0; i != Elts; ++i) {
6784     SDValue N1Elt = N1.getOperand(i);
6785     SDValue N2Elt = N2.getOperand(i);
6786     if (N1Elt.isUndef() || N2Elt.isUndef())
6787       continue;
6788 
6789     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
6790     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
6791     if (C1 != C2 + 1)
6792       AllAddOne = false;
6793     if (C1 != C2 - 1)
6794       AllSubOne = false;
6795   }
6796 
6797   // Further simplifications for the extra-special cases where the constants are
6798   // all 0 or all -1 should be implemented as folds of these patterns.
6799   SDLoc DL(N);
6800   if (AllAddOne || AllSubOne) {
6801     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
6802     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
6803     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
6804     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
6805     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
6806   }
6807 
6808   // The general case for select-of-constants:
6809   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
6810   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
6811   // leave that to a machine-specific pass.
6812   return SDValue();
6813 }
6814 
6815 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6816   SDValue N0 = N->getOperand(0);
6817   SDValue N1 = N->getOperand(1);
6818   SDValue N2 = N->getOperand(2);
6819   SDLoc DL(N);
6820 
6821   // fold (vselect C, X, X) -> X
6822   if (N1 == N2)
6823     return N1;
6824 
6825   // Canonicalize integer abs.
6826   // vselect (setg[te] X,  0),  X, -X ->
6827   // vselect (setgt    X, -1),  X, -X ->
6828   // vselect (setl[te] X,  0), -X,  X ->
6829   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6830   if (N0.getOpcode() == ISD::SETCC) {
6831     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6832     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6833     bool isAbs = false;
6834     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6835 
6836     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6837          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6838         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6839       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6840     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6841              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6842       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6843 
6844     if (isAbs) {
6845       EVT VT = LHS.getValueType();
6846       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6847         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6848 
6849       SDValue Shift = DAG.getNode(
6850           ISD::SRA, DL, VT, LHS,
6851           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6852       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6853       AddToWorklist(Shift.getNode());
6854       AddToWorklist(Add.getNode());
6855       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6856     }
6857   }
6858 
6859   if (SimplifySelectOps(N, N1, N2))
6860     return SDValue(N, 0);  // Don't revisit N.
6861 
6862   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6863   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6864     return N1;
6865   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6866   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6867     return N2;
6868 
6869   // The ConvertSelectToConcatVector function is assuming both the above
6870   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6871   // and addressed.
6872   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6873       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6874       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6875     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6876       return CV;
6877   }
6878 
6879   if (SDValue V = foldVSelectOfConstants(N))
6880     return V;
6881 
6882   return SDValue();
6883 }
6884 
6885 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6886   SDValue N0 = N->getOperand(0);
6887   SDValue N1 = N->getOperand(1);
6888   SDValue N2 = N->getOperand(2);
6889   SDValue N3 = N->getOperand(3);
6890   SDValue N4 = N->getOperand(4);
6891   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6892 
6893   // fold select_cc lhs, rhs, x, x, cc -> x
6894   if (N2 == N3)
6895     return N2;
6896 
6897   // Determine if the condition we're dealing with is constant
6898   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6899                                   CC, SDLoc(N), false)) {
6900     AddToWorklist(SCC.getNode());
6901 
6902     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6903       if (!SCCC->isNullValue())
6904         return N2;    // cond always true -> true val
6905       else
6906         return N3;    // cond always false -> false val
6907     } else if (SCC->isUndef()) {
6908       // When the condition is UNDEF, just return the first operand. This is
6909       // coherent the DAG creation, no setcc node is created in this case
6910       return N2;
6911     } else if (SCC.getOpcode() == ISD::SETCC) {
6912       // Fold to a simpler select_cc
6913       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6914                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6915                          SCC.getOperand(2));
6916     }
6917   }
6918 
6919   // If we can fold this based on the true/false value, do so.
6920   if (SimplifySelectOps(N, N2, N3))
6921     return SDValue(N, 0);  // Don't revisit N.
6922 
6923   // fold select_cc into other things, such as min/max/abs
6924   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6925 }
6926 
6927 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6928   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6929                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6930                        SDLoc(N));
6931 }
6932 
6933 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6934   SDValue LHS = N->getOperand(0);
6935   SDValue RHS = N->getOperand(1);
6936   SDValue Carry = N->getOperand(2);
6937   SDValue Cond = N->getOperand(3);
6938 
6939   // If Carry is false, fold to a regular SETCC.
6940   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6941     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6942 
6943   return SDValue();
6944 }
6945 
6946 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6947   SDValue LHS = N->getOperand(0);
6948   SDValue RHS = N->getOperand(1);
6949   SDValue Carry = N->getOperand(2);
6950   SDValue Cond = N->getOperand(3);
6951 
6952   // If Carry is false, fold to a regular SETCC.
6953   if (isNullConstant(Carry))
6954     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6955 
6956   return SDValue();
6957 }
6958 
6959 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6960 /// a build_vector of constants.
6961 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6962 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6963 /// Vector extends are not folded if operations are legal; this is to
6964 /// avoid introducing illegal build_vector dag nodes.
6965 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6966                                          SelectionDAG &DAG, bool LegalTypes,
6967                                          bool LegalOperations) {
6968   unsigned Opcode = N->getOpcode();
6969   SDValue N0 = N->getOperand(0);
6970   EVT VT = N->getValueType(0);
6971 
6972   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6973          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6974          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6975          && "Expected EXTEND dag node in input!");
6976 
6977   // fold (sext c1) -> c1
6978   // fold (zext c1) -> c1
6979   // fold (aext c1) -> c1
6980   if (isa<ConstantSDNode>(N0))
6981     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6982 
6983   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6984   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6985   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6986   EVT SVT = VT.getScalarType();
6987   if (!(VT.isVector() &&
6988       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6989       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6990     return nullptr;
6991 
6992   // We can fold this node into a build_vector.
6993   unsigned VTBits = SVT.getSizeInBits();
6994   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6995   SmallVector<SDValue, 8> Elts;
6996   unsigned NumElts = VT.getVectorNumElements();
6997   SDLoc DL(N);
6998 
6999   for (unsigned i=0; i != NumElts; ++i) {
7000     SDValue Op = N0->getOperand(i);
7001     if (Op->isUndef()) {
7002       Elts.push_back(DAG.getUNDEF(SVT));
7003       continue;
7004     }
7005 
7006     SDLoc DL(Op);
7007     // Get the constant value and if needed trunc it to the size of the type.
7008     // Nodes like build_vector might have constants wider than the scalar type.
7009     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7010     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7011       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7012     else
7013       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7014   }
7015 
7016   return DAG.getBuildVector(VT, DL, Elts).getNode();
7017 }
7018 
7019 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7020 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7021 // transformation. Returns true if extension are possible and the above
7022 // mentioned transformation is profitable.
7023 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
7024                                     unsigned ExtOpc,
7025                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7026                                     const TargetLowering &TLI) {
7027   bool HasCopyToRegUses = false;
7028   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
7029   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7030                             UE = N0.getNode()->use_end();
7031        UI != UE; ++UI) {
7032     SDNode *User = *UI;
7033     if (User == N)
7034       continue;
7035     if (UI.getUse().getResNo() != N0.getResNo())
7036       continue;
7037     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7038     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7039       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7040       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7041         // Sign bits will be lost after a zext.
7042         return false;
7043       bool Add = false;
7044       for (unsigned i = 0; i != 2; ++i) {
7045         SDValue UseOp = User->getOperand(i);
7046         if (UseOp == N0)
7047           continue;
7048         if (!isa<ConstantSDNode>(UseOp))
7049           return false;
7050         Add = true;
7051       }
7052       if (Add)
7053         ExtendNodes.push_back(User);
7054       continue;
7055     }
7056     // If truncates aren't free and there are users we can't
7057     // extend, it isn't worthwhile.
7058     if (!isTruncFree)
7059       return false;
7060     // Remember if this value is live-out.
7061     if (User->getOpcode() == ISD::CopyToReg)
7062       HasCopyToRegUses = true;
7063   }
7064 
7065   if (HasCopyToRegUses) {
7066     bool BothLiveOut = false;
7067     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7068          UI != UE; ++UI) {
7069       SDUse &Use = UI.getUse();
7070       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7071         BothLiveOut = true;
7072         break;
7073       }
7074     }
7075     if (BothLiveOut)
7076       // Both unextended and extended values are live out. There had better be
7077       // a good reason for the transformation.
7078       return ExtendNodes.size();
7079   }
7080   return true;
7081 }
7082 
7083 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7084                                   SDValue Trunc, SDValue ExtLoad,
7085                                   const SDLoc &DL, ISD::NodeType ExtType) {
7086   // Extend SetCC uses if necessary.
7087   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7088     SDNode *SetCC = SetCCs[i];
7089     SmallVector<SDValue, 4> Ops;
7090 
7091     for (unsigned j = 0; j != 2; ++j) {
7092       SDValue SOp = SetCC->getOperand(j);
7093       if (SOp == Trunc)
7094         Ops.push_back(ExtLoad);
7095       else
7096         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7097     }
7098 
7099     Ops.push_back(SetCC->getOperand(2));
7100     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7101   }
7102 }
7103 
7104 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7105 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7106   SDValue N0 = N->getOperand(0);
7107   EVT DstVT = N->getValueType(0);
7108   EVT SrcVT = N0.getValueType();
7109 
7110   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7111           N->getOpcode() == ISD::ZERO_EXTEND) &&
7112          "Unexpected node type (not an extend)!");
7113 
7114   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7115   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7116   //   (v8i32 (sext (v8i16 (load x))))
7117   // into:
7118   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7119   //                          (v4i32 (sextload (x + 16)))))
7120   // Where uses of the original load, i.e.:
7121   //   (v8i16 (load x))
7122   // are replaced with:
7123   //   (v8i16 (truncate
7124   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7125   //                            (v4i32 (sextload (x + 16)))))))
7126   //
7127   // This combine is only applicable to illegal, but splittable, vectors.
7128   // All legal types, and illegal non-vector types, are handled elsewhere.
7129   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7130   //
7131   if (N0->getOpcode() != ISD::LOAD)
7132     return SDValue();
7133 
7134   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7135 
7136   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7137       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7138       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7139     return SDValue();
7140 
7141   SmallVector<SDNode *, 4> SetCCs;
7142   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7143     return SDValue();
7144 
7145   ISD::LoadExtType ExtType =
7146       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7147 
7148   // Try to split the vector types to get down to legal types.
7149   EVT SplitSrcVT = SrcVT;
7150   EVT SplitDstVT = DstVT;
7151   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7152          SplitSrcVT.getVectorNumElements() > 1) {
7153     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7154     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7155   }
7156 
7157   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7158     return SDValue();
7159 
7160   SDLoc DL(N);
7161   const unsigned NumSplits =
7162       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7163   const unsigned Stride = SplitSrcVT.getStoreSize();
7164   SmallVector<SDValue, 4> Loads;
7165   SmallVector<SDValue, 4> Chains;
7166 
7167   SDValue BasePtr = LN0->getBasePtr();
7168   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7169     const unsigned Offset = Idx * Stride;
7170     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7171 
7172     SDValue SplitLoad = DAG.getExtLoad(
7173         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7174         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7175         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7176 
7177     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7178                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7179 
7180     Loads.push_back(SplitLoad.getValue(0));
7181     Chains.push_back(SplitLoad.getValue(1));
7182   }
7183 
7184   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7185   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7186 
7187   // Simplify TF.
7188   AddToWorklist(NewChain.getNode());
7189 
7190   CombineTo(N, NewValue);
7191 
7192   // Replace uses of the original load (before extension)
7193   // with a truncate of the concatenated sextloaded vectors.
7194   SDValue Trunc =
7195       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7196   CombineTo(N0.getNode(), Trunc, NewChain);
7197   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7198                   (ISD::NodeType)N->getOpcode());
7199   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7200 }
7201 
7202 /// If we're narrowing or widening the result of a vector select and the final
7203 /// size is the same size as a setcc (compare) feeding the select, then try to
7204 /// apply the cast operation to the select's operands because matching vector
7205 /// sizes for a select condition and other operands should be more efficient.
7206 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7207   unsigned CastOpcode = Cast->getOpcode();
7208   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7209           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7210           CastOpcode == ISD::FP_ROUND) &&
7211          "Unexpected opcode for vector select narrowing/widening");
7212 
7213   // We only do this transform before legal ops because the pattern may be
7214   // obfuscated by target-specific operations after legalization. Do not create
7215   // an illegal select op, however, because that may be difficult to lower.
7216   EVT VT = Cast->getValueType(0);
7217   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7218     return SDValue();
7219 
7220   SDValue VSel = Cast->getOperand(0);
7221   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7222       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7223     return SDValue();
7224 
7225   // Does the setcc have the same vector size as the casted select?
7226   SDValue SetCC = VSel.getOperand(0);
7227   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7228   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7229     return SDValue();
7230 
7231   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7232   SDValue A = VSel.getOperand(1);
7233   SDValue B = VSel.getOperand(2);
7234   SDValue CastA, CastB;
7235   SDLoc DL(Cast);
7236   if (CastOpcode == ISD::FP_ROUND) {
7237     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7238     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7239     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7240   } else {
7241     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7242     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7243   }
7244   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7245 }
7246 
7247 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7248   SDValue N0 = N->getOperand(0);
7249   EVT VT = N->getValueType(0);
7250   SDLoc DL(N);
7251 
7252   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7253                                               LegalOperations))
7254     return SDValue(Res, 0);
7255 
7256   // fold (sext (sext x)) -> (sext x)
7257   // fold (sext (aext x)) -> (sext x)
7258   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7259     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7260 
7261   if (N0.getOpcode() == ISD::TRUNCATE) {
7262     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7263     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7264     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7265       SDNode *oye = N0.getOperand(0).getNode();
7266       if (NarrowLoad.getNode() != N0.getNode()) {
7267         CombineTo(N0.getNode(), NarrowLoad);
7268         // CombineTo deleted the truncate, if needed, but not what's under it.
7269         AddToWorklist(oye);
7270       }
7271       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7272     }
7273 
7274     // See if the value being truncated is already sign extended.  If so, just
7275     // eliminate the trunc/sext pair.
7276     SDValue Op = N0.getOperand(0);
7277     unsigned OpBits   = Op.getScalarValueSizeInBits();
7278     unsigned MidBits  = N0.getScalarValueSizeInBits();
7279     unsigned DestBits = VT.getScalarSizeInBits();
7280     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7281 
7282     if (OpBits == DestBits) {
7283       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7284       // bits, it is already ready.
7285       if (NumSignBits > DestBits-MidBits)
7286         return Op;
7287     } else if (OpBits < DestBits) {
7288       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7289       // bits, just sext from i32.
7290       if (NumSignBits > OpBits-MidBits)
7291         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7292     } else {
7293       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7294       // bits, just truncate to i32.
7295       if (NumSignBits > OpBits-MidBits)
7296         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7297     }
7298 
7299     // fold (sext (truncate x)) -> (sextinreg x).
7300     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7301                                                  N0.getValueType())) {
7302       if (OpBits < DestBits)
7303         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7304       else if (OpBits > DestBits)
7305         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7306       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7307                          DAG.getValueType(N0.getValueType()));
7308     }
7309   }
7310 
7311   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7312   // Only generate vector extloads when 1) they're legal, and 2) they are
7313   // deemed desirable by the target.
7314   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7315       ((!LegalOperations && !VT.isVector() &&
7316         !cast<LoadSDNode>(N0)->isVolatile()) ||
7317        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7318     bool DoXform = true;
7319     SmallVector<SDNode*, 4> SetCCs;
7320     if (!N0.hasOneUse())
7321       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7322     if (VT.isVector())
7323       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7324     if (DoXform) {
7325       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7326       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7327                                        LN0->getBasePtr(), N0.getValueType(),
7328                                        LN0->getMemOperand());
7329       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7330                                   N0.getValueType(), ExtLoad);
7331       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7332       // If the load value is used only by N, replace it via CombineTo N.
7333       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7334       CombineTo(N, ExtLoad);
7335       if (NoReplaceTrunc)
7336         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7337       else
7338         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7339       return SDValue(N, 0);
7340     }
7341   }
7342 
7343   // fold (sext (load x)) to multiple smaller sextloads.
7344   // Only on illegal but splittable vectors.
7345   if (SDValue ExtLoad = CombineExtLoad(N))
7346     return ExtLoad;
7347 
7348   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7349   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7350   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7351       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7352     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7353     EVT MemVT = LN0->getMemoryVT();
7354     if ((!LegalOperations && !LN0->isVolatile()) ||
7355         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7356       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7357                                        LN0->getBasePtr(), MemVT,
7358                                        LN0->getMemOperand());
7359       CombineTo(N, ExtLoad);
7360       CombineTo(N0.getNode(),
7361                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7362                             N0.getValueType(), ExtLoad),
7363                 ExtLoad.getValue(1));
7364       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7365     }
7366   }
7367 
7368   // fold (sext (and/or/xor (load x), cst)) ->
7369   //      (and/or/xor (sextload x), (sext cst))
7370   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7371        N0.getOpcode() == ISD::XOR) &&
7372       isa<LoadSDNode>(N0.getOperand(0)) &&
7373       N0.getOperand(1).getOpcode() == ISD::Constant &&
7374       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7375       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7376     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7377     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7378       bool DoXform = true;
7379       SmallVector<SDNode*, 4> SetCCs;
7380       if (!N0.hasOneUse())
7381         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7382                                           SetCCs, TLI);
7383       if (DoXform) {
7384         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7385                                          LN0->getChain(), LN0->getBasePtr(),
7386                                          LN0->getMemoryVT(),
7387                                          LN0->getMemOperand());
7388         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7389         Mask = Mask.sext(VT.getSizeInBits());
7390         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7391                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7392         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7393                                     SDLoc(N0.getOperand(0)),
7394                                     N0.getOperand(0).getValueType(), ExtLoad);
7395         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7396         bool NoReplaceTruncAnd = !N0.hasOneUse();
7397         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7398         CombineTo(N, And);
7399         // If N0 has multiple uses, change other uses as well.
7400         if (NoReplaceTruncAnd) {
7401           SDValue TruncAnd =
7402               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7403           CombineTo(N0.getNode(), TruncAnd);
7404         }
7405         if (NoReplaceTrunc)
7406           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7407         else
7408           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7409         return SDValue(N,0); // Return N so it doesn't get rechecked!
7410       }
7411     }
7412   }
7413 
7414   if (N0.getOpcode() == ISD::SETCC) {
7415     SDValue N00 = N0.getOperand(0);
7416     SDValue N01 = N0.getOperand(1);
7417     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7418     EVT N00VT = N0.getOperand(0).getValueType();
7419 
7420     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7421     // Only do this before legalize for now.
7422     if (VT.isVector() && !LegalOperations &&
7423         TLI.getBooleanContents(N00VT) ==
7424             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7425       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7426       // of the same size as the compared operands. Only optimize sext(setcc())
7427       // if this is the case.
7428       EVT SVT = getSetCCResultType(N00VT);
7429 
7430       // We know that the # elements of the results is the same as the
7431       // # elements of the compare (and the # elements of the compare result
7432       // for that matter).  Check to see that they are the same size.  If so,
7433       // we know that the element size of the sext'd result matches the
7434       // element size of the compare operands.
7435       if (VT.getSizeInBits() == SVT.getSizeInBits())
7436         return DAG.getSetCC(DL, VT, N00, N01, CC);
7437 
7438       // If the desired elements are smaller or larger than the source
7439       // elements, we can use a matching integer vector type and then
7440       // truncate/sign extend.
7441       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7442       if (SVT == MatchingVecType) {
7443         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7444         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7445       }
7446     }
7447 
7448     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7449     // Here, T can be 1 or -1, depending on the type of the setcc and
7450     // getBooleanContents().
7451     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7452 
7453     // To determine the "true" side of the select, we need to know the high bit
7454     // of the value returned by the setcc if it evaluates to true.
7455     // If the type of the setcc is i1, then the true case of the select is just
7456     // sext(i1 1), that is, -1.
7457     // If the type of the setcc is larger (say, i8) then the value of the high
7458     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7459     // of the appropriate width.
7460     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7461                                            : TLI.getConstTrueVal(DAG, VT, DL);
7462     SDValue Zero = DAG.getConstant(0, DL, VT);
7463     if (SDValue SCC =
7464             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7465       return SCC;
7466 
7467     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
7468       EVT SetCCVT = getSetCCResultType(N00VT);
7469       // Don't do this transform for i1 because there's a select transform
7470       // that would reverse it.
7471       // TODO: We should not do this transform at all without a target hook
7472       // because a sext is likely cheaper than a select?
7473       if (SetCCVT.getScalarSizeInBits() != 1 &&
7474           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7475         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7476         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7477       }
7478     }
7479   }
7480 
7481   // fold (sext x) -> (zext x) if the sign bit is known zero.
7482   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7483       DAG.SignBitIsZero(N0))
7484     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7485 
7486   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7487     return NewVSel;
7488 
7489   return SDValue();
7490 }
7491 
7492 // isTruncateOf - If N is a truncate of some other value, return true, record
7493 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7494 // This function computes KnownBits to avoid a duplicated call to
7495 // computeKnownBits in the caller.
7496 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7497                          KnownBits &Known) {
7498   if (N->getOpcode() == ISD::TRUNCATE) {
7499     Op = N->getOperand(0);
7500     DAG.computeKnownBits(Op, Known);
7501     return true;
7502   }
7503 
7504   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7505       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7506     return false;
7507 
7508   SDValue Op0 = N->getOperand(0);
7509   SDValue Op1 = N->getOperand(1);
7510   assert(Op0.getValueType() == Op1.getValueType());
7511 
7512   if (isNullConstant(Op0))
7513     Op = Op1;
7514   else if (isNullConstant(Op1))
7515     Op = Op0;
7516   else
7517     return false;
7518 
7519   DAG.computeKnownBits(Op, Known);
7520 
7521   if (!(Known.Zero | 1).isAllOnesValue())
7522     return false;
7523 
7524   return true;
7525 }
7526 
7527 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7528   SDValue N0 = N->getOperand(0);
7529   EVT VT = N->getValueType(0);
7530 
7531   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7532                                               LegalOperations))
7533     return SDValue(Res, 0);
7534 
7535   // fold (zext (zext x)) -> (zext x)
7536   // fold (zext (aext x)) -> (zext x)
7537   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7538     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7539                        N0.getOperand(0));
7540 
7541   // fold (zext (truncate x)) -> (zext x) or
7542   //      (zext (truncate x)) -> (truncate x)
7543   // This is valid when the truncated bits of x are already zero.
7544   // FIXME: We should extend this to work for vectors too.
7545   SDValue Op;
7546   KnownBits Known;
7547   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7548     APInt TruncatedBits =
7549       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7550       APInt(Op.getValueSizeInBits(), 0) :
7551       APInt::getBitsSet(Op.getValueSizeInBits(),
7552                         N0.getValueSizeInBits(),
7553                         std::min(Op.getValueSizeInBits(),
7554                                  VT.getSizeInBits()));
7555     if (TruncatedBits.isSubsetOf(Known.Zero))
7556       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7557   }
7558 
7559   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7560   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7561   if (N0.getOpcode() == ISD::TRUNCATE) {
7562     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7563       SDNode *oye = N0.getOperand(0).getNode();
7564       if (NarrowLoad.getNode() != N0.getNode()) {
7565         CombineTo(N0.getNode(), NarrowLoad);
7566         // CombineTo deleted the truncate, if needed, but not what's under it.
7567         AddToWorklist(oye);
7568       }
7569       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7570     }
7571   }
7572 
7573   // fold (zext (truncate x)) -> (and x, mask)
7574   if (N0.getOpcode() == ISD::TRUNCATE) {
7575     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7576     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7577     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7578       SDNode *oye = N0.getOperand(0).getNode();
7579       if (NarrowLoad.getNode() != N0.getNode()) {
7580         CombineTo(N0.getNode(), NarrowLoad);
7581         // CombineTo deleted the truncate, if needed, but not what's under it.
7582         AddToWorklist(oye);
7583       }
7584       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7585     }
7586 
7587     EVT SrcVT = N0.getOperand(0).getValueType();
7588     EVT MinVT = N0.getValueType();
7589 
7590     // Try to mask before the extension to avoid having to generate a larger mask,
7591     // possibly over several sub-vectors.
7592     if (SrcVT.bitsLT(VT)) {
7593       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7594                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7595         SDValue Op = N0.getOperand(0);
7596         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7597         AddToWorklist(Op.getNode());
7598         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7599       }
7600     }
7601 
7602     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7603       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7604       AddToWorklist(Op.getNode());
7605       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7606     }
7607   }
7608 
7609   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7610   // if either of the casts is not free.
7611   if (N0.getOpcode() == ISD::AND &&
7612       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7613       N0.getOperand(1).getOpcode() == ISD::Constant &&
7614       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7615                            N0.getValueType()) ||
7616        !TLI.isZExtFree(N0.getValueType(), VT))) {
7617     SDValue X = N0.getOperand(0).getOperand(0);
7618     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7619     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7620     Mask = Mask.zext(VT.getSizeInBits());
7621     SDLoc DL(N);
7622     return DAG.getNode(ISD::AND, DL, VT,
7623                        X, DAG.getConstant(Mask, DL, VT));
7624   }
7625 
7626   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7627   // Only generate vector extloads when 1) they're legal, and 2) they are
7628   // deemed desirable by the target.
7629   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7630       ((!LegalOperations && !VT.isVector() &&
7631         !cast<LoadSDNode>(N0)->isVolatile()) ||
7632        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7633     bool DoXform = true;
7634     SmallVector<SDNode*, 4> SetCCs;
7635     if (!N0.hasOneUse())
7636       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7637     if (VT.isVector())
7638       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7639     if (DoXform) {
7640       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7641       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7642                                        LN0->getChain(),
7643                                        LN0->getBasePtr(), N0.getValueType(),
7644                                        LN0->getMemOperand());
7645 
7646       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7647                                   N0.getValueType(), ExtLoad);
7648       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7649       // If the load value is used only by N, replace it via CombineTo N.
7650       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7651       CombineTo(N, ExtLoad);
7652       if (NoReplaceTrunc)
7653         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7654       else
7655         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7656       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7657     }
7658   }
7659 
7660   // fold (zext (load x)) to multiple smaller zextloads.
7661   // Only on illegal but splittable vectors.
7662   if (SDValue ExtLoad = CombineExtLoad(N))
7663     return ExtLoad;
7664 
7665   // fold (zext (and/or/xor (load x), cst)) ->
7666   //      (and/or/xor (zextload x), (zext cst))
7667   // Unless (and (load x) cst) will match as a zextload already and has
7668   // additional users.
7669   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7670        N0.getOpcode() == ISD::XOR) &&
7671       isa<LoadSDNode>(N0.getOperand(0)) &&
7672       N0.getOperand(1).getOpcode() == ISD::Constant &&
7673       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7674       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7675     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7676     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7677       bool DoXform = true;
7678       SmallVector<SDNode*, 4> SetCCs;
7679       if (!N0.hasOneUse()) {
7680         if (N0.getOpcode() == ISD::AND) {
7681           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7682           auto NarrowLoad = false;
7683           EVT LoadResultTy = AndC->getValueType(0);
7684           EVT ExtVT, LoadedVT;
7685           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7686                                NarrowLoad))
7687             DoXform = false;
7688         }
7689         if (DoXform)
7690           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7691                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7692       }
7693       if (DoXform) {
7694         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7695                                          LN0->getChain(), LN0->getBasePtr(),
7696                                          LN0->getMemoryVT(),
7697                                          LN0->getMemOperand());
7698         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7699         Mask = Mask.zext(VT.getSizeInBits());
7700         SDLoc DL(N);
7701         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7702                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7703         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7704                                     SDLoc(N0.getOperand(0)),
7705                                     N0.getOperand(0).getValueType(), ExtLoad);
7706         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7707         bool NoReplaceTruncAnd = !N0.hasOneUse();
7708         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7709         CombineTo(N, And);
7710         // If N0 has multiple uses, change other uses as well.
7711         if (NoReplaceTruncAnd) {
7712           SDValue TruncAnd =
7713               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7714           CombineTo(N0.getNode(), TruncAnd);
7715         }
7716         if (NoReplaceTrunc)
7717           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7718         else
7719           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7720         return SDValue(N,0); // Return N so it doesn't get rechecked!
7721       }
7722     }
7723   }
7724 
7725   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7726   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7727   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7728       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7729     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7730     EVT MemVT = LN0->getMemoryVT();
7731     if ((!LegalOperations && !LN0->isVolatile()) ||
7732         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7733       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7734                                        LN0->getChain(),
7735                                        LN0->getBasePtr(), MemVT,
7736                                        LN0->getMemOperand());
7737       CombineTo(N, ExtLoad);
7738       CombineTo(N0.getNode(),
7739                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7740                             ExtLoad),
7741                 ExtLoad.getValue(1));
7742       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7743     }
7744   }
7745 
7746   if (N0.getOpcode() == ISD::SETCC) {
7747     // Only do this before legalize for now.
7748     if (!LegalOperations && VT.isVector() &&
7749         N0.getValueType().getVectorElementType() == MVT::i1) {
7750       EVT N00VT = N0.getOperand(0).getValueType();
7751       if (getSetCCResultType(N00VT) == N0.getValueType())
7752         return SDValue();
7753 
7754       // We know that the # elements of the results is the same as the #
7755       // elements of the compare (and the # elements of the compare result for
7756       // that matter). Check to see that they are the same size. If so, we know
7757       // that the element size of the sext'd result matches the element size of
7758       // the compare operands.
7759       SDLoc DL(N);
7760       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7761       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7762         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7763         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7764                                      N0.getOperand(1), N0.getOperand(2));
7765         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7766       }
7767 
7768       // If the desired elements are smaller or larger than the source
7769       // elements we can use a matching integer vector type and then
7770       // truncate/sign extend.
7771       EVT MatchingElementType = EVT::getIntegerVT(
7772           *DAG.getContext(), N00VT.getScalarSizeInBits());
7773       EVT MatchingVectorType = EVT::getVectorVT(
7774           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7775       SDValue VsetCC =
7776           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7777                       N0.getOperand(1), N0.getOperand(2));
7778       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7779                          VecOnes);
7780     }
7781 
7782     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7783     SDLoc DL(N);
7784     if (SDValue SCC = SimplifySelectCC(
7785             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7786             DAG.getConstant(0, DL, VT),
7787             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7788       return SCC;
7789   }
7790 
7791   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7792   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7793       isa<ConstantSDNode>(N0.getOperand(1)) &&
7794       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7795       N0.hasOneUse()) {
7796     SDValue ShAmt = N0.getOperand(1);
7797     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7798     if (N0.getOpcode() == ISD::SHL) {
7799       SDValue InnerZExt = N0.getOperand(0);
7800       // If the original shl may be shifting out bits, do not perform this
7801       // transformation.
7802       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7803         InnerZExt.getOperand(0).getValueSizeInBits();
7804       if (ShAmtVal > KnownZeroBits)
7805         return SDValue();
7806     }
7807 
7808     SDLoc DL(N);
7809 
7810     // Ensure that the shift amount is wide enough for the shifted value.
7811     if (VT.getSizeInBits() >= 256)
7812       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7813 
7814     return DAG.getNode(N0.getOpcode(), DL, VT,
7815                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7816                        ShAmt);
7817   }
7818 
7819   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7820     return NewVSel;
7821 
7822   return SDValue();
7823 }
7824 
7825 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7826   SDValue N0 = N->getOperand(0);
7827   EVT VT = N->getValueType(0);
7828 
7829   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7830                                               LegalOperations))
7831     return SDValue(Res, 0);
7832 
7833   // fold (aext (aext x)) -> (aext x)
7834   // fold (aext (zext x)) -> (zext x)
7835   // fold (aext (sext x)) -> (sext x)
7836   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7837       N0.getOpcode() == ISD::ZERO_EXTEND ||
7838       N0.getOpcode() == ISD::SIGN_EXTEND)
7839     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7840 
7841   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7842   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7843   if (N0.getOpcode() == ISD::TRUNCATE) {
7844     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7845       SDNode *oye = N0.getOperand(0).getNode();
7846       if (NarrowLoad.getNode() != N0.getNode()) {
7847         CombineTo(N0.getNode(), NarrowLoad);
7848         // CombineTo deleted the truncate, if needed, but not what's under it.
7849         AddToWorklist(oye);
7850       }
7851       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7852     }
7853   }
7854 
7855   // fold (aext (truncate x))
7856   if (N0.getOpcode() == ISD::TRUNCATE)
7857     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7858 
7859   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7860   // if the trunc is not free.
7861   if (N0.getOpcode() == ISD::AND &&
7862       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7863       N0.getOperand(1).getOpcode() == ISD::Constant &&
7864       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7865                           N0.getValueType())) {
7866     SDLoc DL(N);
7867     SDValue X = N0.getOperand(0).getOperand(0);
7868     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7869     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7870     Mask = Mask.zext(VT.getSizeInBits());
7871     return DAG.getNode(ISD::AND, DL, VT,
7872                        X, DAG.getConstant(Mask, DL, VT));
7873   }
7874 
7875   // fold (aext (load x)) -> (aext (truncate (extload x)))
7876   // None of the supported targets knows how to perform load and any_ext
7877   // on vectors in one instruction.  We only perform this transformation on
7878   // scalars.
7879   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7880       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7881       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7882     bool DoXform = true;
7883     SmallVector<SDNode*, 4> SetCCs;
7884     if (!N0.hasOneUse())
7885       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7886     if (DoXform) {
7887       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7888       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7889                                        LN0->getChain(),
7890                                        LN0->getBasePtr(), N0.getValueType(),
7891                                        LN0->getMemOperand());
7892       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7893                                   N0.getValueType(), ExtLoad);
7894       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7895                       ISD::ANY_EXTEND);
7896       // If the load value is used only by N, replace it via CombineTo N.
7897       bool NoReplaceTrunc = N0.hasOneUse();
7898       CombineTo(N, ExtLoad);
7899       if (NoReplaceTrunc)
7900         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7901       else
7902         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7903       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7904     }
7905   }
7906 
7907   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7908   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7909   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7910   if (N0.getOpcode() == ISD::LOAD &&
7911       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7912       N0.hasOneUse()) {
7913     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7914     ISD::LoadExtType ExtType = LN0->getExtensionType();
7915     EVT MemVT = LN0->getMemoryVT();
7916     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7917       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7918                                        VT, LN0->getChain(), LN0->getBasePtr(),
7919                                        MemVT, LN0->getMemOperand());
7920       CombineTo(N, ExtLoad);
7921       CombineTo(N0.getNode(),
7922                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7923                             N0.getValueType(), ExtLoad),
7924                 ExtLoad.getValue(1));
7925       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7926     }
7927   }
7928 
7929   if (N0.getOpcode() == ISD::SETCC) {
7930     // For vectors:
7931     // aext(setcc) -> vsetcc
7932     // aext(setcc) -> truncate(vsetcc)
7933     // aext(setcc) -> aext(vsetcc)
7934     // Only do this before legalize for now.
7935     if (VT.isVector() && !LegalOperations) {
7936       EVT N0VT = N0.getOperand(0).getValueType();
7937         // We know that the # elements of the results is the same as the
7938         // # elements of the compare (and the # elements of the compare result
7939         // for that matter).  Check to see that they are the same size.  If so,
7940         // we know that the element size of the sext'd result matches the
7941         // element size of the compare operands.
7942       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7943         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7944                              N0.getOperand(1),
7945                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7946       // If the desired elements are smaller or larger than the source
7947       // elements we can use a matching integer vector type and then
7948       // truncate/any extend
7949       else {
7950         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7951         SDValue VsetCC =
7952           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7953                         N0.getOperand(1),
7954                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7955         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7956       }
7957     }
7958 
7959     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7960     SDLoc DL(N);
7961     if (SDValue SCC = SimplifySelectCC(
7962             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7963             DAG.getConstant(0, DL, VT),
7964             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7965       return SCC;
7966   }
7967 
7968   return SDValue();
7969 }
7970 
7971 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7972   SDValue N0 = N->getOperand(0);
7973   SDValue N1 = N->getOperand(1);
7974   EVT EVT = cast<VTSDNode>(N1)->getVT();
7975 
7976   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7977   if (N0.getOpcode() == ISD::AssertZext &&
7978       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7979     return N0;
7980 
7981   return SDValue();
7982 }
7983 
7984 /// If the result of a wider load is shifted to right of N  bits and then
7985 /// truncated to a narrower type and where N is a multiple of number of bits of
7986 /// the narrower type, transform it to a narrower load from address + N / num of
7987 /// bits of new type. If the result is to be extended, also fold the extension
7988 /// to form a extending load.
7989 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7990   unsigned Opc = N->getOpcode();
7991 
7992   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7993   SDValue N0 = N->getOperand(0);
7994   EVT VT = N->getValueType(0);
7995   EVT ExtVT = VT;
7996 
7997   // This transformation isn't valid for vector loads.
7998   if (VT.isVector())
7999     return SDValue();
8000 
8001   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8002   // extended to VT.
8003   if (Opc == ISD::SIGN_EXTEND_INREG) {
8004     ExtType = ISD::SEXTLOAD;
8005     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8006   } else if (Opc == ISD::SRL) {
8007     // Another special-case: SRL is basically zero-extending a narrower value.
8008     ExtType = ISD::ZEXTLOAD;
8009     N0 = SDValue(N, 0);
8010     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8011     if (!N01) return SDValue();
8012     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8013                               VT.getSizeInBits() - N01->getZExtValue());
8014   }
8015   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
8016     return SDValue();
8017 
8018   unsigned EVTBits = ExtVT.getSizeInBits();
8019 
8020   // Do not generate loads of non-round integer types since these can
8021   // be expensive (and would be wrong if the type is not byte sized).
8022   if (!ExtVT.isRound())
8023     return SDValue();
8024 
8025   unsigned ShAmt = 0;
8026   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8027     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8028       ShAmt = N01->getZExtValue();
8029       // Is the shift amount a multiple of size of VT?
8030       if ((ShAmt & (EVTBits-1)) == 0) {
8031         N0 = N0.getOperand(0);
8032         // Is the load width a multiple of size of VT?
8033         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8034           return SDValue();
8035       }
8036 
8037       // At this point, we must have a load or else we can't do the transform.
8038       if (!isa<LoadSDNode>(N0)) return SDValue();
8039 
8040       // Because a SRL must be assumed to *need* to zero-extend the high bits
8041       // (as opposed to anyext the high bits), we can't combine the zextload
8042       // lowering of SRL and an sextload.
8043       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
8044         return SDValue();
8045 
8046       // If the shift amount is larger than the input type then we're not
8047       // accessing any of the loaded bytes.  If the load was a zextload/extload
8048       // then the result of the shift+trunc is zero/undef (handled elsewhere).
8049       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
8050         return SDValue();
8051     }
8052   }
8053 
8054   // If the load is shifted left (and the result isn't shifted back right),
8055   // we can fold the truncate through the shift.
8056   unsigned ShLeftAmt = 0;
8057   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8058       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8059     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8060       ShLeftAmt = N01->getZExtValue();
8061       N0 = N0.getOperand(0);
8062     }
8063   }
8064 
8065   // If we haven't found a load, we can't narrow it.  Don't transform one with
8066   // multiple uses, this would require adding a new load.
8067   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
8068     return SDValue();
8069 
8070   // Don't change the width of a volatile load.
8071   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8072   if (LN0->isVolatile())
8073     return SDValue();
8074 
8075   // Verify that we are actually reducing a load width here.
8076   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
8077     return SDValue();
8078 
8079   // For the transform to be legal, the load must produce only two values
8080   // (the value loaded and the chain).  Don't transform a pre-increment
8081   // load, for example, which produces an extra value.  Otherwise the
8082   // transformation is not equivalent, and the downstream logic to replace
8083   // uses gets things wrong.
8084   if (LN0->getNumValues() > 2)
8085     return SDValue();
8086 
8087   // If the load that we're shrinking is an extload and we're not just
8088   // discarding the extension we can't simply shrink the load. Bail.
8089   // TODO: It would be possible to merge the extensions in some cases.
8090   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
8091       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
8092     return SDValue();
8093 
8094   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
8095     return SDValue();
8096 
8097   EVT PtrType = N0.getOperand(1).getValueType();
8098 
8099   if (PtrType == MVT::Untyped || PtrType.isExtended())
8100     // It's not possible to generate a constant of extended or untyped type.
8101     return SDValue();
8102 
8103   // For big endian targets, we need to adjust the offset to the pointer to
8104   // load the correct bytes.
8105   if (DAG.getDataLayout().isBigEndian()) {
8106     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8107     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8108     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8109   }
8110 
8111   uint64_t PtrOff = ShAmt / 8;
8112   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8113   SDLoc DL(LN0);
8114   // The original load itself didn't wrap, so an offset within it doesn't.
8115   SDNodeFlags Flags;
8116   Flags.setNoUnsignedWrap(true);
8117   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8118                                PtrType, LN0->getBasePtr(),
8119                                DAG.getConstant(PtrOff, DL, PtrType),
8120                                Flags);
8121   AddToWorklist(NewPtr.getNode());
8122 
8123   SDValue Load;
8124   if (ExtType == ISD::NON_EXTLOAD)
8125     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8126                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8127                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8128   else
8129     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8130                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8131                           NewAlign, LN0->getMemOperand()->getFlags(),
8132                           LN0->getAAInfo());
8133 
8134   // Replace the old load's chain with the new load's chain.
8135   WorklistRemover DeadNodes(*this);
8136   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8137 
8138   // Shift the result left, if we've swallowed a left shift.
8139   SDValue Result = Load;
8140   if (ShLeftAmt != 0) {
8141     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8142     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8143       ShImmTy = VT;
8144     // If the shift amount is as large as the result size (but, presumably,
8145     // no larger than the source) then the useful bits of the result are
8146     // zero; we can't simply return the shortened shift, because the result
8147     // of that operation is undefined.
8148     SDLoc DL(N0);
8149     if (ShLeftAmt >= VT.getSizeInBits())
8150       Result = DAG.getConstant(0, DL, VT);
8151     else
8152       Result = DAG.getNode(ISD::SHL, DL, VT,
8153                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8154   }
8155 
8156   // Return the new loaded value.
8157   return Result;
8158 }
8159 
8160 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8161   SDValue N0 = N->getOperand(0);
8162   SDValue N1 = N->getOperand(1);
8163   EVT VT = N->getValueType(0);
8164   EVT EVT = cast<VTSDNode>(N1)->getVT();
8165   unsigned VTBits = VT.getScalarSizeInBits();
8166   unsigned EVTBits = EVT.getScalarSizeInBits();
8167 
8168   if (N0.isUndef())
8169     return DAG.getUNDEF(VT);
8170 
8171   // fold (sext_in_reg c1) -> c1
8172   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8173     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8174 
8175   // If the input is already sign extended, just drop the extension.
8176   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8177     return N0;
8178 
8179   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8180   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8181       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8182     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8183                        N0.getOperand(0), N1);
8184 
8185   // fold (sext_in_reg (sext x)) -> (sext x)
8186   // fold (sext_in_reg (aext x)) -> (sext x)
8187   // if x is small enough.
8188   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8189     SDValue N00 = N0.getOperand(0);
8190     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8191         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8192       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8193   }
8194 
8195   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8196   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8197        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8198        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8199       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8200     if (!LegalOperations ||
8201         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8202       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8203   }
8204 
8205   // fold (sext_in_reg (zext x)) -> (sext x)
8206   // iff we are extending the source sign bit.
8207   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8208     SDValue N00 = N0.getOperand(0);
8209     if (N00.getScalarValueSizeInBits() == EVTBits &&
8210         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8211       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8212   }
8213 
8214   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8215   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8216     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8217 
8218   // fold operands of sext_in_reg based on knowledge that the top bits are not
8219   // demanded.
8220   if (SimplifyDemandedBits(SDValue(N, 0)))
8221     return SDValue(N, 0);
8222 
8223   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8224   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8225   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8226     return NarrowLoad;
8227 
8228   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8229   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8230   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8231   if (N0.getOpcode() == ISD::SRL) {
8232     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8233       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8234         // We can turn this into an SRA iff the input to the SRL is already sign
8235         // extended enough.
8236         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8237         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8238           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8239                              N0.getOperand(0), N0.getOperand(1));
8240       }
8241   }
8242 
8243   // fold (sext_inreg (extload x)) -> (sextload x)
8244   if (ISD::isEXTLoad(N0.getNode()) &&
8245       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8246       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8247       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8248        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8249     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8250     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8251                                      LN0->getChain(),
8252                                      LN0->getBasePtr(), EVT,
8253                                      LN0->getMemOperand());
8254     CombineTo(N, ExtLoad);
8255     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8256     AddToWorklist(ExtLoad.getNode());
8257     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8258   }
8259   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8260   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8261       N0.hasOneUse() &&
8262       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8263       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8264        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8265     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8266     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8267                                      LN0->getChain(),
8268                                      LN0->getBasePtr(), EVT,
8269                                      LN0->getMemOperand());
8270     CombineTo(N, ExtLoad);
8271     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8272     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8273   }
8274 
8275   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8276   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8277     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8278                                            N0.getOperand(1), false))
8279       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8280                          BSwap, N1);
8281   }
8282 
8283   return SDValue();
8284 }
8285 
8286 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8287   SDValue N0 = N->getOperand(0);
8288   EVT VT = N->getValueType(0);
8289 
8290   if (N0.isUndef())
8291     return DAG.getUNDEF(VT);
8292 
8293   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8294                                               LegalOperations))
8295     return SDValue(Res, 0);
8296 
8297   return SDValue();
8298 }
8299 
8300 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8301   SDValue N0 = N->getOperand(0);
8302   EVT VT = N->getValueType(0);
8303 
8304   if (N0.isUndef())
8305     return DAG.getUNDEF(VT);
8306 
8307   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8308                                               LegalOperations))
8309     return SDValue(Res, 0);
8310 
8311   return SDValue();
8312 }
8313 
8314 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8315   SDValue N0 = N->getOperand(0);
8316   EVT VT = N->getValueType(0);
8317   bool isLE = DAG.getDataLayout().isLittleEndian();
8318 
8319   // noop truncate
8320   if (N0.getValueType() == N->getValueType(0))
8321     return N0;
8322   // fold (truncate c1) -> c1
8323   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8324     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8325   // fold (truncate (truncate x)) -> (truncate x)
8326   if (N0.getOpcode() == ISD::TRUNCATE)
8327     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8328   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8329   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8330       N0.getOpcode() == ISD::SIGN_EXTEND ||
8331       N0.getOpcode() == ISD::ANY_EXTEND) {
8332     // if the source is smaller than the dest, we still need an extend.
8333     if (N0.getOperand(0).getValueType().bitsLT(VT))
8334       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8335     // if the source is larger than the dest, than we just need the truncate.
8336     if (N0.getOperand(0).getValueType().bitsGT(VT))
8337       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8338     // if the source and dest are the same type, we can drop both the extend
8339     // and the truncate.
8340     return N0.getOperand(0);
8341   }
8342 
8343   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8344   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8345     return SDValue();
8346 
8347   // Fold extract-and-trunc into a narrow extract. For example:
8348   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8349   //   i32 y = TRUNCATE(i64 x)
8350   //        -- becomes --
8351   //   v16i8 b = BITCAST (v2i64 val)
8352   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8353   //
8354   // Note: We only run this optimization after type legalization (which often
8355   // creates this pattern) and before operation legalization after which
8356   // we need to be more careful about the vector instructions that we generate.
8357   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8358       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8359 
8360     EVT VecTy = N0.getOperand(0).getValueType();
8361     EVT ExTy = N0.getValueType();
8362     EVT TrTy = N->getValueType(0);
8363 
8364     unsigned NumElem = VecTy.getVectorNumElements();
8365     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8366 
8367     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8368     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8369 
8370     SDValue EltNo = N0->getOperand(1);
8371     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8372       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8373       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8374       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8375 
8376       SDLoc DL(N);
8377       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8378                          DAG.getBitcast(NVT, N0.getOperand(0)),
8379                          DAG.getConstant(Index, DL, IndexTy));
8380     }
8381   }
8382 
8383   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8384   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8385     EVT SrcVT = N0.getValueType();
8386     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8387         TLI.isTruncateFree(SrcVT, VT)) {
8388       SDLoc SL(N0);
8389       SDValue Cond = N0.getOperand(0);
8390       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8391       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8392       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8393     }
8394   }
8395 
8396   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8397   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8398       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8399       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8400     SDValue Amt = N0.getOperand(1);
8401     KnownBits Known;
8402     DAG.computeKnownBits(Amt, Known);
8403     unsigned Size = VT.getScalarSizeInBits();
8404     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8405       SDLoc SL(N);
8406       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8407 
8408       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8409       if (AmtVT != Amt.getValueType()) {
8410         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8411         AddToWorklist(Amt.getNode());
8412       }
8413       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8414     }
8415   }
8416 
8417   // Fold a series of buildvector, bitcast, and truncate if possible.
8418   // For example fold
8419   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8420   //   (2xi32 (buildvector x, y)).
8421   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8422       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8423       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8424       N0.getOperand(0).hasOneUse()) {
8425 
8426     SDValue BuildVect = N0.getOperand(0);
8427     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8428     EVT TruncVecEltTy = VT.getVectorElementType();
8429 
8430     // Check that the element types match.
8431     if (BuildVectEltTy == TruncVecEltTy) {
8432       // Now we only need to compute the offset of the truncated elements.
8433       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8434       unsigned TruncVecNumElts = VT.getVectorNumElements();
8435       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8436 
8437       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8438              "Invalid number of elements");
8439 
8440       SmallVector<SDValue, 8> Opnds;
8441       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8442         Opnds.push_back(BuildVect.getOperand(i));
8443 
8444       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8445     }
8446   }
8447 
8448   // See if we can simplify the input to this truncate through knowledge that
8449   // only the low bits are being used.
8450   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8451   // Currently we only perform this optimization on scalars because vectors
8452   // may have different active low bits.
8453   if (!VT.isVector()) {
8454     APInt Mask =
8455         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8456     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8457       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8458   }
8459 
8460   // fold (truncate (load x)) -> (smaller load x)
8461   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8462   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8463     if (SDValue Reduced = ReduceLoadWidth(N))
8464       return Reduced;
8465 
8466     // Handle the case where the load remains an extending load even
8467     // after truncation.
8468     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8469       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8470       if (!LN0->isVolatile() &&
8471           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8472         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8473                                          VT, LN0->getChain(), LN0->getBasePtr(),
8474                                          LN0->getMemoryVT(),
8475                                          LN0->getMemOperand());
8476         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8477         return NewLoad;
8478       }
8479     }
8480   }
8481 
8482   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8483   // where ... are all 'undef'.
8484   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8485     SmallVector<EVT, 8> VTs;
8486     SDValue V;
8487     unsigned Idx = 0;
8488     unsigned NumDefs = 0;
8489 
8490     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8491       SDValue X = N0.getOperand(i);
8492       if (!X.isUndef()) {
8493         V = X;
8494         Idx = i;
8495         NumDefs++;
8496       }
8497       // Stop if more than one members are non-undef.
8498       if (NumDefs > 1)
8499         break;
8500       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8501                                      VT.getVectorElementType(),
8502                                      X.getValueType().getVectorNumElements()));
8503     }
8504 
8505     if (NumDefs == 0)
8506       return DAG.getUNDEF(VT);
8507 
8508     if (NumDefs == 1) {
8509       assert(V.getNode() && "The single defined operand is empty!");
8510       SmallVector<SDValue, 8> Opnds;
8511       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8512         if (i != Idx) {
8513           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8514           continue;
8515         }
8516         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8517         AddToWorklist(NV.getNode());
8518         Opnds.push_back(NV);
8519       }
8520       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8521     }
8522   }
8523 
8524   // Fold truncate of a bitcast of a vector to an extract of the low vector
8525   // element.
8526   //
8527   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8528   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8529     SDValue VecSrc = N0.getOperand(0);
8530     EVT SrcVT = VecSrc.getValueType();
8531     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8532         (!LegalOperations ||
8533          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8534       SDLoc SL(N);
8535 
8536       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8537       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8538       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8539                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8540     }
8541   }
8542 
8543   // Simplify the operands using demanded-bits information.
8544   if (!VT.isVector() &&
8545       SimplifyDemandedBits(SDValue(N, 0)))
8546     return SDValue(N, 0);
8547 
8548   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8549   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8550   // When the adde's carry is not used.
8551   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8552       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8553       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8554     SDLoc SL(N);
8555     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8556     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8557     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8558     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8559   }
8560 
8561   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8562     return NewVSel;
8563 
8564   return SDValue();
8565 }
8566 
8567 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8568   SDValue Elt = N->getOperand(i);
8569   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8570     return Elt.getNode();
8571   return Elt.getOperand(Elt.getResNo()).getNode();
8572 }
8573 
8574 /// build_pair (load, load) -> load
8575 /// if load locations are consecutive.
8576 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8577   assert(N->getOpcode() == ISD::BUILD_PAIR);
8578 
8579   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8580   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8581   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8582       LD1->getAddressSpace() != LD2->getAddressSpace())
8583     return SDValue();
8584   EVT LD1VT = LD1->getValueType(0);
8585   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8586   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8587       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8588     unsigned Align = LD1->getAlignment();
8589     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8590         VT.getTypeForEVT(*DAG.getContext()));
8591 
8592     if (NewAlign <= Align &&
8593         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8594       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8595                          LD1->getPointerInfo(), Align);
8596   }
8597 
8598   return SDValue();
8599 }
8600 
8601 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8602   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8603   // and Lo parts; on big-endian machines it doesn't.
8604   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8605 }
8606 
8607 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8608                                     const TargetLowering &TLI) {
8609   // If this is not a bitcast to an FP type or if the target doesn't have
8610   // IEEE754-compliant FP logic, we're done.
8611   EVT VT = N->getValueType(0);
8612   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8613     return SDValue();
8614 
8615   // TODO: Use splat values for the constant-checking below and remove this
8616   // restriction.
8617   SDValue N0 = N->getOperand(0);
8618   EVT SourceVT = N0.getValueType();
8619   if (SourceVT.isVector())
8620     return SDValue();
8621 
8622   unsigned FPOpcode;
8623   APInt SignMask;
8624   switch (N0.getOpcode()) {
8625   case ISD::AND:
8626     FPOpcode = ISD::FABS;
8627     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8628     break;
8629   case ISD::XOR:
8630     FPOpcode = ISD::FNEG;
8631     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8632     break;
8633   // TODO: ISD::OR --> ISD::FNABS?
8634   default:
8635     return SDValue();
8636   }
8637 
8638   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8639   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8640   SDValue LogicOp0 = N0.getOperand(0);
8641   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8642   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8643       LogicOp0.getOpcode() == ISD::BITCAST &&
8644       LogicOp0->getOperand(0).getValueType() == VT)
8645     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8646 
8647   return SDValue();
8648 }
8649 
8650 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8651   SDValue N0 = N->getOperand(0);
8652   EVT VT = N->getValueType(0);
8653 
8654   if (N0.isUndef())
8655     return DAG.getUNDEF(VT);
8656 
8657   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8658   // Only do this before legalize, since afterward the target may be depending
8659   // on the bitconvert.
8660   // First check to see if this is all constant.
8661   if (!LegalTypes &&
8662       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8663       VT.isVector()) {
8664     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8665 
8666     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8667     assert(!DestEltVT.isVector() &&
8668            "Element type of vector ValueType must not be vector!");
8669     if (isSimple)
8670       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8671   }
8672 
8673   // If the input is a constant, let getNode fold it.
8674   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8675     // If we can't allow illegal operations, we need to check that this is just
8676     // a fp -> int or int -> conversion and that the resulting operation will
8677     // be legal.
8678     if (!LegalOperations ||
8679         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8680          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8681         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8682          TLI.isOperationLegal(ISD::Constant, VT)))
8683       return DAG.getBitcast(VT, N0);
8684   }
8685 
8686   // (conv (conv x, t1), t2) -> (conv x, t2)
8687   if (N0.getOpcode() == ISD::BITCAST)
8688     return DAG.getBitcast(VT, N0.getOperand(0));
8689 
8690   // fold (conv (load x)) -> (load (conv*)x)
8691   // If the resultant load doesn't need a higher alignment than the original!
8692   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8693       // Do not change the width of a volatile load.
8694       !cast<LoadSDNode>(N0)->isVolatile() &&
8695       // Do not remove the cast if the types differ in endian layout.
8696       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8697           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8698       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8699       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8700     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8701     unsigned OrigAlign = LN0->getAlignment();
8702 
8703     bool Fast = false;
8704     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8705                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8706         Fast) {
8707       SDValue Load =
8708           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8709                       LN0->getPointerInfo(), OrigAlign,
8710                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8711       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8712       return Load;
8713     }
8714   }
8715 
8716   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8717     return V;
8718 
8719   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8720   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8721   //
8722   // For ppc_fp128:
8723   // fold (bitcast (fneg x)) ->
8724   //     flipbit = signbit
8725   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8726   //
8727   // fold (bitcast (fabs x)) ->
8728   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8729   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8730   // This often reduces constant pool loads.
8731   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8732        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8733       N0.getNode()->hasOneUse() && VT.isInteger() &&
8734       !VT.isVector() && !N0.getValueType().isVector()) {
8735     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8736     AddToWorklist(NewConv.getNode());
8737 
8738     SDLoc DL(N);
8739     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8740       assert(VT.getSizeInBits() == 128);
8741       SDValue SignBit = DAG.getConstant(
8742           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8743       SDValue FlipBit;
8744       if (N0.getOpcode() == ISD::FNEG) {
8745         FlipBit = SignBit;
8746         AddToWorklist(FlipBit.getNode());
8747       } else {
8748         assert(N0.getOpcode() == ISD::FABS);
8749         SDValue Hi =
8750             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8751                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8752                                               SDLoc(NewConv)));
8753         AddToWorklist(Hi.getNode());
8754         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8755         AddToWorklist(FlipBit.getNode());
8756       }
8757       SDValue FlipBits =
8758           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8759       AddToWorklist(FlipBits.getNode());
8760       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8761     }
8762     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8763     if (N0.getOpcode() == ISD::FNEG)
8764       return DAG.getNode(ISD::XOR, DL, VT,
8765                          NewConv, DAG.getConstant(SignBit, DL, VT));
8766     assert(N0.getOpcode() == ISD::FABS);
8767     return DAG.getNode(ISD::AND, DL, VT,
8768                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8769   }
8770 
8771   // fold (bitconvert (fcopysign cst, x)) ->
8772   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8773   // Note that we don't handle (copysign x, cst) because this can always be
8774   // folded to an fneg or fabs.
8775   //
8776   // For ppc_fp128:
8777   // fold (bitcast (fcopysign cst, x)) ->
8778   //     flipbit = (and (extract_element
8779   //                     (xor (bitcast cst), (bitcast x)), 0),
8780   //                    signbit)
8781   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8782   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8783       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8784       VT.isInteger() && !VT.isVector()) {
8785     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8786     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8787     if (isTypeLegal(IntXVT)) {
8788       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8789       AddToWorklist(X.getNode());
8790 
8791       // If X has a different width than the result/lhs, sext it or truncate it.
8792       unsigned VTWidth = VT.getSizeInBits();
8793       if (OrigXWidth < VTWidth) {
8794         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8795         AddToWorklist(X.getNode());
8796       } else if (OrigXWidth > VTWidth) {
8797         // To get the sign bit in the right place, we have to shift it right
8798         // before truncating.
8799         SDLoc DL(X);
8800         X = DAG.getNode(ISD::SRL, DL,
8801                         X.getValueType(), X,
8802                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8803                                         X.getValueType()));
8804         AddToWorklist(X.getNode());
8805         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8806         AddToWorklist(X.getNode());
8807       }
8808 
8809       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8810         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8811         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8812         AddToWorklist(Cst.getNode());
8813         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8814         AddToWorklist(X.getNode());
8815         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8816         AddToWorklist(XorResult.getNode());
8817         SDValue XorResult64 = DAG.getNode(
8818             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8819             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8820                                   SDLoc(XorResult)));
8821         AddToWorklist(XorResult64.getNode());
8822         SDValue FlipBit =
8823             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8824                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8825         AddToWorklist(FlipBit.getNode());
8826         SDValue FlipBits =
8827             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8828         AddToWorklist(FlipBits.getNode());
8829         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8830       }
8831       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8832       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8833                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8834       AddToWorklist(X.getNode());
8835 
8836       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8837       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8838                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8839       AddToWorklist(Cst.getNode());
8840 
8841       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8842     }
8843   }
8844 
8845   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8846   if (N0.getOpcode() == ISD::BUILD_PAIR)
8847     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8848       return CombineLD;
8849 
8850   // Remove double bitcasts from shuffles - this is often a legacy of
8851   // XformToShuffleWithZero being used to combine bitmaskings (of
8852   // float vectors bitcast to integer vectors) into shuffles.
8853   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8854   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8855       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8856       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8857       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8858     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8859 
8860     // If operands are a bitcast, peek through if it casts the original VT.
8861     // If operands are a constant, just bitcast back to original VT.
8862     auto PeekThroughBitcast = [&](SDValue Op) {
8863       if (Op.getOpcode() == ISD::BITCAST &&
8864           Op.getOperand(0).getValueType() == VT)
8865         return SDValue(Op.getOperand(0));
8866       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8867           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8868         return DAG.getBitcast(VT, Op);
8869       return SDValue();
8870     };
8871 
8872     // FIXME: If either input vector is bitcast, try to convert the shuffle to
8873     // the result type of this bitcast. This would eliminate at least one
8874     // bitcast. See the transform in InstCombine.
8875     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8876     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8877     if (!(SV0 && SV1))
8878       return SDValue();
8879 
8880     int MaskScale =
8881         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8882     SmallVector<int, 8> NewMask;
8883     for (int M : SVN->getMask())
8884       for (int i = 0; i != MaskScale; ++i)
8885         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8886 
8887     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8888     if (!LegalMask) {
8889       std::swap(SV0, SV1);
8890       ShuffleVectorSDNode::commuteMask(NewMask);
8891       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8892     }
8893 
8894     if (LegalMask)
8895       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8896   }
8897 
8898   return SDValue();
8899 }
8900 
8901 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8902   EVT VT = N->getValueType(0);
8903   return CombineConsecutiveLoads(N, VT);
8904 }
8905 
8906 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8907 /// operands. DstEltVT indicates the destination element value type.
8908 SDValue DAGCombiner::
8909 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8910   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8911 
8912   // If this is already the right type, we're done.
8913   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8914 
8915   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8916   unsigned DstBitSize = DstEltVT.getSizeInBits();
8917 
8918   // If this is a conversion of N elements of one type to N elements of another
8919   // type, convert each element.  This handles FP<->INT cases.
8920   if (SrcBitSize == DstBitSize) {
8921     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8922                               BV->getValueType(0).getVectorNumElements());
8923 
8924     // Due to the FP element handling below calling this routine recursively,
8925     // we can end up with a scalar-to-vector node here.
8926     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8927       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8928                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8929 
8930     SmallVector<SDValue, 8> Ops;
8931     for (SDValue Op : BV->op_values()) {
8932       // If the vector element type is not legal, the BUILD_VECTOR operands
8933       // are promoted and implicitly truncated.  Make that explicit here.
8934       if (Op.getValueType() != SrcEltVT)
8935         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8936       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8937       AddToWorklist(Ops.back().getNode());
8938     }
8939     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8940   }
8941 
8942   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8943   // handle annoying details of growing/shrinking FP values, we convert them to
8944   // int first.
8945   if (SrcEltVT.isFloatingPoint()) {
8946     // Convert the input float vector to a int vector where the elements are the
8947     // same sizes.
8948     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8949     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8950     SrcEltVT = IntVT;
8951   }
8952 
8953   // Now we know the input is an integer vector.  If the output is a FP type,
8954   // convert to integer first, then to FP of the right size.
8955   if (DstEltVT.isFloatingPoint()) {
8956     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8957     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8958 
8959     // Next, convert to FP elements of the same size.
8960     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8961   }
8962 
8963   SDLoc DL(BV);
8964 
8965   // Okay, we know the src/dst types are both integers of differing types.
8966   // Handling growing first.
8967   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8968   if (SrcBitSize < DstBitSize) {
8969     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8970 
8971     SmallVector<SDValue, 8> Ops;
8972     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8973          i += NumInputsPerOutput) {
8974       bool isLE = DAG.getDataLayout().isLittleEndian();
8975       APInt NewBits = APInt(DstBitSize, 0);
8976       bool EltIsUndef = true;
8977       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8978         // Shift the previously computed bits over.
8979         NewBits <<= SrcBitSize;
8980         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8981         if (Op.isUndef()) continue;
8982         EltIsUndef = false;
8983 
8984         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8985                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8986       }
8987 
8988       if (EltIsUndef)
8989         Ops.push_back(DAG.getUNDEF(DstEltVT));
8990       else
8991         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8992     }
8993 
8994     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8995     return DAG.getBuildVector(VT, DL, Ops);
8996   }
8997 
8998   // Finally, this must be the case where we are shrinking elements: each input
8999   // turns into multiple outputs.
9000   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9001   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9002                             NumOutputsPerInput*BV->getNumOperands());
9003   SmallVector<SDValue, 8> Ops;
9004 
9005   for (const SDValue &Op : BV->op_values()) {
9006     if (Op.isUndef()) {
9007       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9008       continue;
9009     }
9010 
9011     APInt OpVal = cast<ConstantSDNode>(Op)->
9012                   getAPIntValue().zextOrTrunc(SrcBitSize);
9013 
9014     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9015       APInt ThisVal = OpVal.trunc(DstBitSize);
9016       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9017       OpVal.lshrInPlace(DstBitSize);
9018     }
9019 
9020     // For big endian targets, swap the order of the pieces of each element.
9021     if (DAG.getDataLayout().isBigEndian())
9022       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9023   }
9024 
9025   return DAG.getBuildVector(VT, DL, Ops);
9026 }
9027 
9028 static bool isContractable(SDNode *N) {
9029   SDNodeFlags F = N->getFlags();
9030   return F.hasAllowContract() || F.hasUnsafeAlgebra();
9031 }
9032 
9033 /// Try to perform FMA combining on a given FADD node.
9034 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9035   SDValue N0 = N->getOperand(0);
9036   SDValue N1 = N->getOperand(1);
9037   EVT VT = N->getValueType(0);
9038   SDLoc SL(N);
9039 
9040   const TargetOptions &Options = DAG.getTarget().Options;
9041 
9042   // Floating-point multiply-add with intermediate rounding.
9043   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9044 
9045   // Floating-point multiply-add without intermediate rounding.
9046   bool HasFMA =
9047       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9048       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9049 
9050   // No valid opcode, do not combine.
9051   if (!HasFMAD && !HasFMA)
9052     return SDValue();
9053 
9054   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9055                               Options.UnsafeFPMath || HasFMAD);
9056   // If the addition is not contractable, do not combine.
9057   if (!AllowFusionGlobally && !isContractable(N))
9058     return SDValue();
9059 
9060   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9061   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9062     return SDValue();
9063 
9064   // Always prefer FMAD to FMA for precision.
9065   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9066   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9067   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9068 
9069   // Is the node an FMUL and contractable either due to global flags or
9070   // SDNodeFlags.
9071   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9072     if (N.getOpcode() != ISD::FMUL)
9073       return false;
9074     return AllowFusionGlobally || isContractable(N.getNode());
9075   };
9076   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9077   // prefer to fold the multiply with fewer uses.
9078   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9079     if (N0.getNode()->use_size() > N1.getNode()->use_size())
9080       std::swap(N0, N1);
9081   }
9082 
9083   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9084   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9085     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9086                        N0.getOperand(0), N0.getOperand(1), N1);
9087   }
9088 
9089   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9090   // Note: Commutes FADD operands.
9091   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9092     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9093                        N1.getOperand(0), N1.getOperand(1), N0);
9094   }
9095 
9096   // Look through FP_EXTEND nodes to do more combining.
9097   if (LookThroughFPExt) {
9098     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9099     if (N0.getOpcode() == ISD::FP_EXTEND) {
9100       SDValue N00 = N0.getOperand(0);
9101       if (isContractableFMUL(N00))
9102         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9103                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9104                                        N00.getOperand(0)),
9105                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9106                                        N00.getOperand(1)), N1);
9107     }
9108 
9109     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9110     // Note: Commutes FADD operands.
9111     if (N1.getOpcode() == ISD::FP_EXTEND) {
9112       SDValue N10 = N1.getOperand(0);
9113       if (isContractableFMUL(N10))
9114         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9115                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9116                                        N10.getOperand(0)),
9117                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9118                                        N10.getOperand(1)), N0);
9119     }
9120   }
9121 
9122   // More folding opportunities when target permits.
9123   if (Aggressive) {
9124     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9125     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9126     // are currently only supported on binary nodes.
9127     if (Options.UnsafeFPMath &&
9128         N0.getOpcode() == PreferredFusedOpcode &&
9129         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9130         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9131       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9132                          N0.getOperand(0), N0.getOperand(1),
9133                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9134                                      N0.getOperand(2).getOperand(0),
9135                                      N0.getOperand(2).getOperand(1),
9136                                      N1));
9137     }
9138 
9139     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9140     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9141     // are currently only supported on binary nodes.
9142     if (Options.UnsafeFPMath &&
9143         N1->getOpcode() == PreferredFusedOpcode &&
9144         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9145         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9146       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9147                          N1.getOperand(0), N1.getOperand(1),
9148                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9149                                      N1.getOperand(2).getOperand(0),
9150                                      N1.getOperand(2).getOperand(1),
9151                                      N0));
9152     }
9153 
9154     if (LookThroughFPExt) {
9155       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9156       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9157       auto FoldFAddFMAFPExtFMul = [&] (
9158           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9159         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9160                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9161                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9162                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9163                                        Z));
9164       };
9165       if (N0.getOpcode() == PreferredFusedOpcode) {
9166         SDValue N02 = N0.getOperand(2);
9167         if (N02.getOpcode() == ISD::FP_EXTEND) {
9168           SDValue N020 = N02.getOperand(0);
9169           if (isContractableFMUL(N020))
9170             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9171                                         N020.getOperand(0), N020.getOperand(1),
9172                                         N1);
9173         }
9174       }
9175 
9176       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9177       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9178       // FIXME: This turns two single-precision and one double-precision
9179       // operation into two double-precision operations, which might not be
9180       // interesting for all targets, especially GPUs.
9181       auto FoldFAddFPExtFMAFMul = [&] (
9182           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9183         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9184                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9185                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9186                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9187                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9188                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9189                                        Z));
9190       };
9191       if (N0.getOpcode() == ISD::FP_EXTEND) {
9192         SDValue N00 = N0.getOperand(0);
9193         if (N00.getOpcode() == PreferredFusedOpcode) {
9194           SDValue N002 = N00.getOperand(2);
9195           if (isContractableFMUL(N002))
9196             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9197                                         N002.getOperand(0), N002.getOperand(1),
9198                                         N1);
9199         }
9200       }
9201 
9202       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9203       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9204       if (N1.getOpcode() == PreferredFusedOpcode) {
9205         SDValue N12 = N1.getOperand(2);
9206         if (N12.getOpcode() == ISD::FP_EXTEND) {
9207           SDValue N120 = N12.getOperand(0);
9208           if (isContractableFMUL(N120))
9209             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9210                                         N120.getOperand(0), N120.getOperand(1),
9211                                         N0);
9212         }
9213       }
9214 
9215       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9216       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9217       // FIXME: This turns two single-precision and one double-precision
9218       // operation into two double-precision operations, which might not be
9219       // interesting for all targets, especially GPUs.
9220       if (N1.getOpcode() == ISD::FP_EXTEND) {
9221         SDValue N10 = N1.getOperand(0);
9222         if (N10.getOpcode() == PreferredFusedOpcode) {
9223           SDValue N102 = N10.getOperand(2);
9224           if (isContractableFMUL(N102))
9225             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9226                                         N102.getOperand(0), N102.getOperand(1),
9227                                         N0);
9228         }
9229       }
9230     }
9231   }
9232 
9233   return SDValue();
9234 }
9235 
9236 /// Try to perform FMA combining on a given FSUB node.
9237 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9238   SDValue N0 = N->getOperand(0);
9239   SDValue N1 = N->getOperand(1);
9240   EVT VT = N->getValueType(0);
9241   SDLoc SL(N);
9242 
9243   const TargetOptions &Options = DAG.getTarget().Options;
9244   // Floating-point multiply-add with intermediate rounding.
9245   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9246 
9247   // Floating-point multiply-add without intermediate rounding.
9248   bool HasFMA =
9249       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9250       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9251 
9252   // No valid opcode, do not combine.
9253   if (!HasFMAD && !HasFMA)
9254     return SDValue();
9255 
9256   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9257                               Options.UnsafeFPMath || HasFMAD);
9258   // If the subtraction is not contractable, do not combine.
9259   if (!AllowFusionGlobally && !isContractable(N))
9260     return SDValue();
9261 
9262   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9263   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9264     return SDValue();
9265 
9266   // Always prefer FMAD to FMA for precision.
9267   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9268   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9269   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9270 
9271   // Is the node an FMUL and contractable either due to global flags or
9272   // SDNodeFlags.
9273   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9274     if (N.getOpcode() != ISD::FMUL)
9275       return false;
9276     return AllowFusionGlobally || isContractable(N.getNode());
9277   };
9278 
9279   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9280   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9281     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9282                        N0.getOperand(0), N0.getOperand(1),
9283                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9284   }
9285 
9286   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9287   // Note: Commutes FSUB operands.
9288   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9289     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9290                        DAG.getNode(ISD::FNEG, SL, VT,
9291                                    N1.getOperand(0)),
9292                        N1.getOperand(1), N0);
9293 
9294   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9295   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9296       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9297     SDValue N00 = N0.getOperand(0).getOperand(0);
9298     SDValue N01 = N0.getOperand(0).getOperand(1);
9299     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9300                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9301                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9302   }
9303 
9304   // Look through FP_EXTEND nodes to do more combining.
9305   if (LookThroughFPExt) {
9306     // fold (fsub (fpext (fmul x, y)), z)
9307     //   -> (fma (fpext x), (fpext y), (fneg z))
9308     if (N0.getOpcode() == ISD::FP_EXTEND) {
9309       SDValue N00 = N0.getOperand(0);
9310       if (isContractableFMUL(N00))
9311         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9312                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9313                                        N00.getOperand(0)),
9314                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9315                                        N00.getOperand(1)),
9316                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9317     }
9318 
9319     // fold (fsub x, (fpext (fmul y, z)))
9320     //   -> (fma (fneg (fpext y)), (fpext z), x)
9321     // Note: Commutes FSUB operands.
9322     if (N1.getOpcode() == ISD::FP_EXTEND) {
9323       SDValue N10 = N1.getOperand(0);
9324       if (isContractableFMUL(N10))
9325         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9326                            DAG.getNode(ISD::FNEG, SL, VT,
9327                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9328                                                    N10.getOperand(0))),
9329                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9330                                        N10.getOperand(1)),
9331                            N0);
9332     }
9333 
9334     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9335     //   -> (fneg (fma (fpext x), (fpext y), z))
9336     // Note: This could be removed with appropriate canonicalization of the
9337     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9338     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9339     // from implementing the canonicalization in visitFSUB.
9340     if (N0.getOpcode() == ISD::FP_EXTEND) {
9341       SDValue N00 = N0.getOperand(0);
9342       if (N00.getOpcode() == ISD::FNEG) {
9343         SDValue N000 = N00.getOperand(0);
9344         if (isContractableFMUL(N000)) {
9345           return DAG.getNode(ISD::FNEG, SL, VT,
9346                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9347                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9348                                                      N000.getOperand(0)),
9349                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9350                                                      N000.getOperand(1)),
9351                                          N1));
9352         }
9353       }
9354     }
9355 
9356     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9357     //   -> (fneg (fma (fpext x)), (fpext y), z)
9358     // Note: This could be removed with appropriate canonicalization of the
9359     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9360     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9361     // from implementing the canonicalization in visitFSUB.
9362     if (N0.getOpcode() == ISD::FNEG) {
9363       SDValue N00 = N0.getOperand(0);
9364       if (N00.getOpcode() == ISD::FP_EXTEND) {
9365         SDValue N000 = N00.getOperand(0);
9366         if (isContractableFMUL(N000)) {
9367           return DAG.getNode(ISD::FNEG, SL, VT,
9368                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9369                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9370                                                      N000.getOperand(0)),
9371                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9372                                                      N000.getOperand(1)),
9373                                          N1));
9374         }
9375       }
9376     }
9377 
9378   }
9379 
9380   // More folding opportunities when target permits.
9381   if (Aggressive) {
9382     // fold (fsub (fma x, y, (fmul u, v)), z)
9383     //   -> (fma x, y (fma u, v, (fneg z)))
9384     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9385     // are currently only supported on binary nodes.
9386     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9387         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9388         N0.getOperand(2)->hasOneUse()) {
9389       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9390                          N0.getOperand(0), N0.getOperand(1),
9391                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9392                                      N0.getOperand(2).getOperand(0),
9393                                      N0.getOperand(2).getOperand(1),
9394                                      DAG.getNode(ISD::FNEG, SL, VT,
9395                                                  N1)));
9396     }
9397 
9398     // fold (fsub x, (fma y, z, (fmul u, v)))
9399     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9400     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9401     // are currently only supported on binary nodes.
9402     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9403         isContractableFMUL(N1.getOperand(2))) {
9404       SDValue N20 = N1.getOperand(2).getOperand(0);
9405       SDValue N21 = N1.getOperand(2).getOperand(1);
9406       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9407                          DAG.getNode(ISD::FNEG, SL, VT,
9408                                      N1.getOperand(0)),
9409                          N1.getOperand(1),
9410                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9411                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9412 
9413                                      N21, N0));
9414     }
9415 
9416     if (LookThroughFPExt) {
9417       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9418       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9419       if (N0.getOpcode() == PreferredFusedOpcode) {
9420         SDValue N02 = N0.getOperand(2);
9421         if (N02.getOpcode() == ISD::FP_EXTEND) {
9422           SDValue N020 = N02.getOperand(0);
9423           if (isContractableFMUL(N020))
9424             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9425                                N0.getOperand(0), N0.getOperand(1),
9426                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9427                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9428                                                        N020.getOperand(0)),
9429                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9430                                                        N020.getOperand(1)),
9431                                            DAG.getNode(ISD::FNEG, SL, VT,
9432                                                        N1)));
9433         }
9434       }
9435 
9436       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9437       //   -> (fma (fpext x), (fpext y),
9438       //           (fma (fpext u), (fpext v), (fneg z)))
9439       // FIXME: This turns two single-precision and one double-precision
9440       // operation into two double-precision operations, which might not be
9441       // interesting for all targets, especially GPUs.
9442       if (N0.getOpcode() == ISD::FP_EXTEND) {
9443         SDValue N00 = N0.getOperand(0);
9444         if (N00.getOpcode() == PreferredFusedOpcode) {
9445           SDValue N002 = N00.getOperand(2);
9446           if (isContractableFMUL(N002))
9447             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9448                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9449                                            N00.getOperand(0)),
9450                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9451                                            N00.getOperand(1)),
9452                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9453                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9454                                                        N002.getOperand(0)),
9455                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9456                                                        N002.getOperand(1)),
9457                                            DAG.getNode(ISD::FNEG, SL, VT,
9458                                                        N1)));
9459         }
9460       }
9461 
9462       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9463       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9464       if (N1.getOpcode() == PreferredFusedOpcode &&
9465         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9466         SDValue N120 = N1.getOperand(2).getOperand(0);
9467         if (isContractableFMUL(N120)) {
9468           SDValue N1200 = N120.getOperand(0);
9469           SDValue N1201 = N120.getOperand(1);
9470           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9471                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9472                              N1.getOperand(1),
9473                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9474                                          DAG.getNode(ISD::FNEG, SL, VT,
9475                                              DAG.getNode(ISD::FP_EXTEND, SL,
9476                                                          VT, N1200)),
9477                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9478                                                      N1201),
9479                                          N0));
9480         }
9481       }
9482 
9483       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9484       //   -> (fma (fneg (fpext y)), (fpext z),
9485       //           (fma (fneg (fpext u)), (fpext v), x))
9486       // FIXME: This turns two single-precision and one double-precision
9487       // operation into two double-precision operations, which might not be
9488       // interesting for all targets, especially GPUs.
9489       if (N1.getOpcode() == ISD::FP_EXTEND &&
9490         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9491         SDValue N100 = N1.getOperand(0).getOperand(0);
9492         SDValue N101 = N1.getOperand(0).getOperand(1);
9493         SDValue N102 = N1.getOperand(0).getOperand(2);
9494         if (isContractableFMUL(N102)) {
9495           SDValue N1020 = N102.getOperand(0);
9496           SDValue N1021 = N102.getOperand(1);
9497           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9498                              DAG.getNode(ISD::FNEG, SL, VT,
9499                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9500                                                      N100)),
9501                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9502                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9503                                          DAG.getNode(ISD::FNEG, SL, VT,
9504                                              DAG.getNode(ISD::FP_EXTEND, SL,
9505                                                          VT, N1020)),
9506                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9507                                                      N1021),
9508                                          N0));
9509         }
9510       }
9511     }
9512   }
9513 
9514   return SDValue();
9515 }
9516 
9517 /// Try to perform FMA combining on a given FMUL node based on the distributive
9518 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9519 /// subtraction instead of addition).
9520 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9521   SDValue N0 = N->getOperand(0);
9522   SDValue N1 = N->getOperand(1);
9523   EVT VT = N->getValueType(0);
9524   SDLoc SL(N);
9525 
9526   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9527 
9528   const TargetOptions &Options = DAG.getTarget().Options;
9529 
9530   // The transforms below are incorrect when x == 0 and y == inf, because the
9531   // intermediate multiplication produces a nan.
9532   if (!Options.NoInfsFPMath)
9533     return SDValue();
9534 
9535   // Floating-point multiply-add without intermediate rounding.
9536   bool HasFMA =
9537       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9538       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9539       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9540 
9541   // Floating-point multiply-add with intermediate rounding. This can result
9542   // in a less precise result due to the changed rounding order.
9543   bool HasFMAD = Options.UnsafeFPMath &&
9544                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9545 
9546   // No valid opcode, do not combine.
9547   if (!HasFMAD && !HasFMA)
9548     return SDValue();
9549 
9550   // Always prefer FMAD to FMA for precision.
9551   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9552   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9553 
9554   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9555   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9556   auto FuseFADD = [&](SDValue X, SDValue Y) {
9557     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9558       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9559       if (XC1 && XC1->isExactlyValue(+1.0))
9560         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9561       if (XC1 && XC1->isExactlyValue(-1.0))
9562         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9563                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9564     }
9565     return SDValue();
9566   };
9567 
9568   if (SDValue FMA = FuseFADD(N0, N1))
9569     return FMA;
9570   if (SDValue FMA = FuseFADD(N1, N0))
9571     return FMA;
9572 
9573   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9574   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9575   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9576   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9577   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9578     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9579       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9580       if (XC0 && XC0->isExactlyValue(+1.0))
9581         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9582                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9583                            Y);
9584       if (XC0 && XC0->isExactlyValue(-1.0))
9585         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9586                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9587                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9588 
9589       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9590       if (XC1 && XC1->isExactlyValue(+1.0))
9591         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9592                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9593       if (XC1 && XC1->isExactlyValue(-1.0))
9594         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9595     }
9596     return SDValue();
9597   };
9598 
9599   if (SDValue FMA = FuseFSUB(N0, N1))
9600     return FMA;
9601   if (SDValue FMA = FuseFSUB(N1, N0))
9602     return FMA;
9603 
9604   return SDValue();
9605 }
9606 
9607 static bool isFMulNegTwo(SDValue &N) {
9608   if (N.getOpcode() != ISD::FMUL)
9609     return false;
9610   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9611     return CFP->isExactlyValue(-2.0);
9612   return false;
9613 }
9614 
9615 SDValue DAGCombiner::visitFADD(SDNode *N) {
9616   SDValue N0 = N->getOperand(0);
9617   SDValue N1 = N->getOperand(1);
9618   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9619   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9620   EVT VT = N->getValueType(0);
9621   SDLoc DL(N);
9622   const TargetOptions &Options = DAG.getTarget().Options;
9623   const SDNodeFlags Flags = N->getFlags();
9624 
9625   // fold vector ops
9626   if (VT.isVector())
9627     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9628       return FoldedVOp;
9629 
9630   // fold (fadd c1, c2) -> c1 + c2
9631   if (N0CFP && N1CFP)
9632     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9633 
9634   // canonicalize constant to RHS
9635   if (N0CFP && !N1CFP)
9636     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9637 
9638   if (SDValue NewSel = foldBinOpIntoSelect(N))
9639     return NewSel;
9640 
9641   // fold (fadd A, (fneg B)) -> (fsub A, B)
9642   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9643       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9644     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9645                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9646 
9647   // fold (fadd (fneg A), B) -> (fsub B, A)
9648   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9649       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9650     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9651                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9652 
9653   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9654   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9655   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9656       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9657     bool N1IsFMul = isFMulNegTwo(N1);
9658     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9659     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9660     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9661   }
9662 
9663   // FIXME: Auto-upgrade the target/function-level option.
9664   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9665     // fold (fadd A, 0) -> A
9666     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9667       if (N1C->isZero())
9668         return N0;
9669   }
9670 
9671   // If 'unsafe math' is enabled, fold lots of things.
9672   if (Options.UnsafeFPMath) {
9673     // No FP constant should be created after legalization as Instruction
9674     // Selection pass has a hard time dealing with FP constants.
9675     bool AllowNewConst = (Level < AfterLegalizeDAG);
9676 
9677     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9678     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9679         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9680       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9681                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9682                                      Flags),
9683                          Flags);
9684 
9685     // If allowed, fold (fadd (fneg x), x) -> 0.0
9686     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9687       return DAG.getConstantFP(0.0, DL, VT);
9688 
9689     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9690     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9691       return DAG.getConstantFP(0.0, DL, VT);
9692 
9693     // We can fold chains of FADD's of the same value into multiplications.
9694     // This transform is not safe in general because we are reducing the number
9695     // of rounding steps.
9696     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9697       if (N0.getOpcode() == ISD::FMUL) {
9698         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9699         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9700 
9701         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9702         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9703           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9704                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9705           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9706         }
9707 
9708         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9709         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9710             N1.getOperand(0) == N1.getOperand(1) &&
9711             N0.getOperand(0) == N1.getOperand(0)) {
9712           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9713                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9714           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9715         }
9716       }
9717 
9718       if (N1.getOpcode() == ISD::FMUL) {
9719         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9720         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9721 
9722         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9723         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9724           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9725                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9726           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9727         }
9728 
9729         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9730         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9731             N0.getOperand(0) == N0.getOperand(1) &&
9732             N1.getOperand(0) == N0.getOperand(0)) {
9733           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9734                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9735           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9736         }
9737       }
9738 
9739       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9740         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9741         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9742         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9743             (N0.getOperand(0) == N1)) {
9744           return DAG.getNode(ISD::FMUL, DL, VT,
9745                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9746         }
9747       }
9748 
9749       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9750         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9751         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9752         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9753             N1.getOperand(0) == N0) {
9754           return DAG.getNode(ISD::FMUL, DL, VT,
9755                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9756         }
9757       }
9758 
9759       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9760       if (AllowNewConst &&
9761           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9762           N0.getOperand(0) == N0.getOperand(1) &&
9763           N1.getOperand(0) == N1.getOperand(1) &&
9764           N0.getOperand(0) == N1.getOperand(0)) {
9765         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9766                            DAG.getConstantFP(4.0, DL, VT), Flags);
9767       }
9768     }
9769   } // enable-unsafe-fp-math
9770 
9771   // FADD -> FMA combines:
9772   if (SDValue Fused = visitFADDForFMACombine(N)) {
9773     AddToWorklist(Fused.getNode());
9774     return Fused;
9775   }
9776   return SDValue();
9777 }
9778 
9779 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9780   SDValue N0 = N->getOperand(0);
9781   SDValue N1 = N->getOperand(1);
9782   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9783   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9784   EVT VT = N->getValueType(0);
9785   SDLoc DL(N);
9786   const TargetOptions &Options = DAG.getTarget().Options;
9787   const SDNodeFlags Flags = N->getFlags();
9788 
9789   // fold vector ops
9790   if (VT.isVector())
9791     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9792       return FoldedVOp;
9793 
9794   // fold (fsub c1, c2) -> c1-c2
9795   if (N0CFP && N1CFP)
9796     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9797 
9798   if (SDValue NewSel = foldBinOpIntoSelect(N))
9799     return NewSel;
9800 
9801   // fold (fsub A, (fneg B)) -> (fadd A, B)
9802   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9803     return DAG.getNode(ISD::FADD, DL, VT, N0,
9804                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9805 
9806   // FIXME: Auto-upgrade the target/function-level option.
9807   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9808     // (fsub 0, B) -> -B
9809     if (N0CFP && N0CFP->isZero()) {
9810       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9811         return GetNegatedExpression(N1, DAG, LegalOperations);
9812       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9813         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9814     }
9815   }
9816 
9817   // If 'unsafe math' is enabled, fold lots of things.
9818   if (Options.UnsafeFPMath) {
9819     // (fsub A, 0) -> A
9820     if (N1CFP && N1CFP->isZero())
9821       return N0;
9822 
9823     // (fsub x, x) -> 0.0
9824     if (N0 == N1)
9825       return DAG.getConstantFP(0.0f, DL, VT);
9826 
9827     // (fsub x, (fadd x, y)) -> (fneg y)
9828     // (fsub x, (fadd y, x)) -> (fneg y)
9829     if (N1.getOpcode() == ISD::FADD) {
9830       SDValue N10 = N1->getOperand(0);
9831       SDValue N11 = N1->getOperand(1);
9832 
9833       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9834         return GetNegatedExpression(N11, DAG, LegalOperations);
9835 
9836       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9837         return GetNegatedExpression(N10, DAG, LegalOperations);
9838     }
9839   }
9840 
9841   // FSUB -> FMA combines:
9842   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9843     AddToWorklist(Fused.getNode());
9844     return Fused;
9845   }
9846 
9847   return SDValue();
9848 }
9849 
9850 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9851   SDValue N0 = N->getOperand(0);
9852   SDValue N1 = N->getOperand(1);
9853   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9854   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9855   EVT VT = N->getValueType(0);
9856   SDLoc DL(N);
9857   const TargetOptions &Options = DAG.getTarget().Options;
9858   const SDNodeFlags Flags = N->getFlags();
9859 
9860   // fold vector ops
9861   if (VT.isVector()) {
9862     // This just handles C1 * C2 for vectors. Other vector folds are below.
9863     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9864       return FoldedVOp;
9865   }
9866 
9867   // fold (fmul c1, c2) -> c1*c2
9868   if (N0CFP && N1CFP)
9869     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9870 
9871   // canonicalize constant to RHS
9872   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9873      !isConstantFPBuildVectorOrConstantFP(N1))
9874     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9875 
9876   // fold (fmul A, 1.0) -> A
9877   if (N1CFP && N1CFP->isExactlyValue(1.0))
9878     return N0;
9879 
9880   if (SDValue NewSel = foldBinOpIntoSelect(N))
9881     return NewSel;
9882 
9883   if (Options.UnsafeFPMath) {
9884     // fold (fmul A, 0) -> 0
9885     if (N1CFP && N1CFP->isZero())
9886       return N1;
9887 
9888     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9889     if (N0.getOpcode() == ISD::FMUL) {
9890       // Fold scalars or any vector constants (not just splats).
9891       // This fold is done in general by InstCombine, but extra fmul insts
9892       // may have been generated during lowering.
9893       SDValue N00 = N0.getOperand(0);
9894       SDValue N01 = N0.getOperand(1);
9895       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9896       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9897       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9898 
9899       // Check 1: Make sure that the first operand of the inner multiply is NOT
9900       // a constant. Otherwise, we may induce infinite looping.
9901       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9902         // Check 2: Make sure that the second operand of the inner multiply and
9903         // the second operand of the outer multiply are constants.
9904         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9905             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9906           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9907           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9908         }
9909       }
9910     }
9911 
9912     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9913     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9914     // during an early run of DAGCombiner can prevent folding with fmuls
9915     // inserted during lowering.
9916     if (N0.getOpcode() == ISD::FADD &&
9917         (N0.getOperand(0) == N0.getOperand(1)) &&
9918         N0.hasOneUse()) {
9919       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9920       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9921       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9922     }
9923   }
9924 
9925   // fold (fmul X, 2.0) -> (fadd X, X)
9926   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9927     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9928 
9929   // fold (fmul X, -1.0) -> (fneg X)
9930   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9931     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9932       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9933 
9934   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9935   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9936     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9937       // Both can be negated for free, check to see if at least one is cheaper
9938       // negated.
9939       if (LHSNeg == 2 || RHSNeg == 2)
9940         return DAG.getNode(ISD::FMUL, DL, VT,
9941                            GetNegatedExpression(N0, DAG, LegalOperations),
9942                            GetNegatedExpression(N1, DAG, LegalOperations),
9943                            Flags);
9944     }
9945   }
9946 
9947   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
9948   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
9949   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
9950       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
9951       TLI.isOperationLegal(ISD::FABS, VT)) {
9952     SDValue Select = N0, X = N1;
9953     if (Select.getOpcode() != ISD::SELECT)
9954       std::swap(Select, X);
9955 
9956     SDValue Cond = Select.getOperand(0);
9957     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
9958     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
9959 
9960     if (TrueOpnd && FalseOpnd &&
9961         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
9962         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
9963         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
9964       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9965       switch (CC) {
9966       default: break;
9967       case ISD::SETOLT:
9968       case ISD::SETULT:
9969       case ISD::SETOLE:
9970       case ISD::SETULE:
9971       case ISD::SETLT:
9972       case ISD::SETLE:
9973         std::swap(TrueOpnd, FalseOpnd);
9974         // Fall through
9975       case ISD::SETOGT:
9976       case ISD::SETUGT:
9977       case ISD::SETOGE:
9978       case ISD::SETUGE:
9979       case ISD::SETGT:
9980       case ISD::SETGE:
9981         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
9982             TLI.isOperationLegal(ISD::FNEG, VT))
9983           return DAG.getNode(ISD::FNEG, DL, VT,
9984                    DAG.getNode(ISD::FABS, DL, VT, X));
9985         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
9986           return DAG.getNode(ISD::FABS, DL, VT, X);
9987 
9988         break;
9989       }
9990     }
9991   }
9992 
9993   // FMUL -> FMA combines:
9994   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9995     AddToWorklist(Fused.getNode());
9996     return Fused;
9997   }
9998 
9999   return SDValue();
10000 }
10001 
10002 SDValue DAGCombiner::visitFMA(SDNode *N) {
10003   SDValue N0 = N->getOperand(0);
10004   SDValue N1 = N->getOperand(1);
10005   SDValue N2 = N->getOperand(2);
10006   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10007   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10008   EVT VT = N->getValueType(0);
10009   SDLoc DL(N);
10010   const TargetOptions &Options = DAG.getTarget().Options;
10011 
10012   // Constant fold FMA.
10013   if (isa<ConstantFPSDNode>(N0) &&
10014       isa<ConstantFPSDNode>(N1) &&
10015       isa<ConstantFPSDNode>(N2)) {
10016     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10017   }
10018 
10019   if (Options.UnsafeFPMath) {
10020     if (N0CFP && N0CFP->isZero())
10021       return N2;
10022     if (N1CFP && N1CFP->isZero())
10023       return N2;
10024   }
10025   // TODO: The FMA node should have flags that propagate to these nodes.
10026   if (N0CFP && N0CFP->isExactlyValue(1.0))
10027     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10028   if (N1CFP && N1CFP->isExactlyValue(1.0))
10029     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10030 
10031   // Canonicalize (fma c, x, y) -> (fma x, c, y)
10032   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10033      !isConstantFPBuildVectorOrConstantFP(N1))
10034     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10035 
10036   // TODO: FMA nodes should have flags that propagate to the created nodes.
10037   // For now, create a Flags object for use with all unsafe math transforms.
10038   SDNodeFlags Flags;
10039   Flags.setUnsafeAlgebra(true);
10040 
10041   if (Options.UnsafeFPMath) {
10042     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10043     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10044         isConstantFPBuildVectorOrConstantFP(N1) &&
10045         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10046       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10047                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10048                                      Flags), Flags);
10049     }
10050 
10051     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10052     if (N0.getOpcode() == ISD::FMUL &&
10053         isConstantFPBuildVectorOrConstantFP(N1) &&
10054         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10055       return DAG.getNode(ISD::FMA, DL, VT,
10056                          N0.getOperand(0),
10057                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10058                                      Flags),
10059                          N2);
10060     }
10061   }
10062 
10063   // (fma x, 1, y) -> (fadd x, y)
10064   // (fma x, -1, y) -> (fadd (fneg x), y)
10065   if (N1CFP) {
10066     if (N1CFP->isExactlyValue(1.0))
10067       // TODO: The FMA node should have flags that propagate to this node.
10068       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10069 
10070     if (N1CFP->isExactlyValue(-1.0) &&
10071         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10072       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10073       AddToWorklist(RHSNeg.getNode());
10074       // TODO: The FMA node should have flags that propagate to this node.
10075       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10076     }
10077   }
10078 
10079   if (Options.UnsafeFPMath) {
10080     // (fma x, c, x) -> (fmul x, (c+1))
10081     if (N1CFP && N0 == N2) {
10082       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10083                          DAG.getNode(ISD::FADD, DL, VT, N1,
10084                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10085                          Flags);
10086     }
10087 
10088     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10089     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10090       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10091                          DAG.getNode(ISD::FADD, DL, VT, N1,
10092                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10093                          Flags);
10094     }
10095   }
10096 
10097   return SDValue();
10098 }
10099 
10100 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10101 // reciprocal.
10102 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10103 // Notice that this is not always beneficial. One reason is different targets
10104 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10105 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10106 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10107 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10108   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10109   const SDNodeFlags Flags = N->getFlags();
10110   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10111     return SDValue();
10112 
10113   // Skip if current node is a reciprocal.
10114   SDValue N0 = N->getOperand(0);
10115   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10116   if (N0CFP && N0CFP->isExactlyValue(1.0))
10117     return SDValue();
10118 
10119   // Exit early if the target does not want this transform or if there can't
10120   // possibly be enough uses of the divisor to make the transform worthwhile.
10121   SDValue N1 = N->getOperand(1);
10122   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10123   if (!MinUses || N1->use_size() < MinUses)
10124     return SDValue();
10125 
10126   // Find all FDIV users of the same divisor.
10127   // Use a set because duplicates may be present in the user list.
10128   SetVector<SDNode *> Users;
10129   for (auto *U : N1->uses()) {
10130     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10131       // This division is eligible for optimization only if global unsafe math
10132       // is enabled or if this division allows reciprocal formation.
10133       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10134         Users.insert(U);
10135     }
10136   }
10137 
10138   // Now that we have the actual number of divisor uses, make sure it meets
10139   // the minimum threshold specified by the target.
10140   if (Users.size() < MinUses)
10141     return SDValue();
10142 
10143   EVT VT = N->getValueType(0);
10144   SDLoc DL(N);
10145   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10146   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10147 
10148   // Dividend / Divisor -> Dividend * Reciprocal
10149   for (auto *U : Users) {
10150     SDValue Dividend = U->getOperand(0);
10151     if (Dividend != FPOne) {
10152       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10153                                     Reciprocal, Flags);
10154       CombineTo(U, NewNode);
10155     } else if (U != Reciprocal.getNode()) {
10156       // In the absence of fast-math-flags, this user node is always the
10157       // same node as Reciprocal, but with FMF they may be different nodes.
10158       CombineTo(U, Reciprocal);
10159     }
10160   }
10161   return SDValue(N, 0);  // N was replaced.
10162 }
10163 
10164 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10165   SDValue N0 = N->getOperand(0);
10166   SDValue N1 = N->getOperand(1);
10167   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10168   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10169   EVT VT = N->getValueType(0);
10170   SDLoc DL(N);
10171   const TargetOptions &Options = DAG.getTarget().Options;
10172   SDNodeFlags Flags = N->getFlags();
10173 
10174   // fold vector ops
10175   if (VT.isVector())
10176     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10177       return FoldedVOp;
10178 
10179   // fold (fdiv c1, c2) -> c1/c2
10180   if (N0CFP && N1CFP)
10181     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10182 
10183   if (SDValue NewSel = foldBinOpIntoSelect(N))
10184     return NewSel;
10185 
10186   if (Options.UnsafeFPMath) {
10187     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10188     if (N1CFP) {
10189       // Compute the reciprocal 1.0 / c2.
10190       const APFloat &N1APF = N1CFP->getValueAPF();
10191       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10192       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10193       // Only do the transform if the reciprocal is a legal fp immediate that
10194       // isn't too nasty (eg NaN, denormal, ...).
10195       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10196           (!LegalOperations ||
10197            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10198            // backend)... we should handle this gracefully after Legalize.
10199            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
10200            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
10201            TLI.isFPImmLegal(Recip, VT)))
10202         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10203                            DAG.getConstantFP(Recip, DL, VT), Flags);
10204     }
10205 
10206     // If this FDIV is part of a reciprocal square root, it may be folded
10207     // into a target-specific square root estimate instruction.
10208     if (N1.getOpcode() == ISD::FSQRT) {
10209       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10210         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10211       }
10212     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10213                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10214       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10215                                           Flags)) {
10216         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10217         AddToWorklist(RV.getNode());
10218         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10219       }
10220     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10221                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10222       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10223                                           Flags)) {
10224         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10225         AddToWorklist(RV.getNode());
10226         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10227       }
10228     } else if (N1.getOpcode() == ISD::FMUL) {
10229       // Look through an FMUL. Even though this won't remove the FDIV directly,
10230       // it's still worthwhile to get rid of the FSQRT if possible.
10231       SDValue SqrtOp;
10232       SDValue OtherOp;
10233       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10234         SqrtOp = N1.getOperand(0);
10235         OtherOp = N1.getOperand(1);
10236       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10237         SqrtOp = N1.getOperand(1);
10238         OtherOp = N1.getOperand(0);
10239       }
10240       if (SqrtOp.getNode()) {
10241         // We found a FSQRT, so try to make this fold:
10242         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10243         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10244           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10245           AddToWorklist(RV.getNode());
10246           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10247         }
10248       }
10249     }
10250 
10251     // Fold into a reciprocal estimate and multiply instead of a real divide.
10252     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10253       AddToWorklist(RV.getNode());
10254       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10255     }
10256   }
10257 
10258   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10259   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10260     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10261       // Both can be negated for free, check to see if at least one is cheaper
10262       // negated.
10263       if (LHSNeg == 2 || RHSNeg == 2)
10264         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10265                            GetNegatedExpression(N0, DAG, LegalOperations),
10266                            GetNegatedExpression(N1, DAG, LegalOperations),
10267                            Flags);
10268     }
10269   }
10270 
10271   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10272     return CombineRepeatedDivisors;
10273 
10274   return SDValue();
10275 }
10276 
10277 SDValue DAGCombiner::visitFREM(SDNode *N) {
10278   SDValue N0 = N->getOperand(0);
10279   SDValue N1 = N->getOperand(1);
10280   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10281   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10282   EVT VT = N->getValueType(0);
10283 
10284   // fold (frem c1, c2) -> fmod(c1,c2)
10285   if (N0CFP && N1CFP)
10286     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10287 
10288   if (SDValue NewSel = foldBinOpIntoSelect(N))
10289     return NewSel;
10290 
10291   return SDValue();
10292 }
10293 
10294 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10295   if (!DAG.getTarget().Options.UnsafeFPMath)
10296     return SDValue();
10297 
10298   SDValue N0 = N->getOperand(0);
10299   if (TLI.isFsqrtCheap(N0, DAG))
10300     return SDValue();
10301 
10302   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10303   // For now, create a Flags object for use with all unsafe math transforms.
10304   SDNodeFlags Flags;
10305   Flags.setUnsafeAlgebra(true);
10306   return buildSqrtEstimate(N0, Flags);
10307 }
10308 
10309 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10310 /// copysign(x, fp_round(y)) -> copysign(x, y)
10311 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10312   SDValue N1 = N->getOperand(1);
10313   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10314        N1.getOpcode() == ISD::FP_ROUND)) {
10315     // Do not optimize out type conversion of f128 type yet.
10316     // For some targets like x86_64, configuration is changed to keep one f128
10317     // value in one SSE register, but instruction selection cannot handle
10318     // FCOPYSIGN on SSE registers yet.
10319     EVT N1VT = N1->getValueType(0);
10320     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10321     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10322   }
10323   return false;
10324 }
10325 
10326 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10327   SDValue N0 = N->getOperand(0);
10328   SDValue N1 = N->getOperand(1);
10329   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10330   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10331   EVT VT = N->getValueType(0);
10332 
10333   if (N0CFP && N1CFP) // Constant fold
10334     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10335 
10336   if (N1CFP) {
10337     const APFloat &V = N1CFP->getValueAPF();
10338     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10339     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10340     if (!V.isNegative()) {
10341       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10342         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10343     } else {
10344       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10345         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10346                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10347     }
10348   }
10349 
10350   // copysign(fabs(x), y) -> copysign(x, y)
10351   // copysign(fneg(x), y) -> copysign(x, y)
10352   // copysign(copysign(x,z), y) -> copysign(x, y)
10353   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10354       N0.getOpcode() == ISD::FCOPYSIGN)
10355     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10356 
10357   // copysign(x, abs(y)) -> abs(x)
10358   if (N1.getOpcode() == ISD::FABS)
10359     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10360 
10361   // copysign(x, copysign(y,z)) -> copysign(x, z)
10362   if (N1.getOpcode() == ISD::FCOPYSIGN)
10363     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10364 
10365   // copysign(x, fp_extend(y)) -> copysign(x, y)
10366   // copysign(x, fp_round(y)) -> copysign(x, y)
10367   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10368     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10369 
10370   return SDValue();
10371 }
10372 
10373 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10374   SDValue N0 = N->getOperand(0);
10375   EVT VT = N->getValueType(0);
10376   EVT OpVT = N0.getValueType();
10377 
10378   // fold (sint_to_fp c1) -> c1fp
10379   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10380       // ...but only if the target supports immediate floating-point values
10381       (!LegalOperations ||
10382        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10383     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10384 
10385   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10386   // but UINT_TO_FP is legal on this target, try to convert.
10387   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10388       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10389     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10390     if (DAG.SignBitIsZero(N0))
10391       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10392   }
10393 
10394   // The next optimizations are desirable only if SELECT_CC can be lowered.
10395   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10396     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10397     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10398         !VT.isVector() &&
10399         (!LegalOperations ||
10400          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10401       SDLoc DL(N);
10402       SDValue Ops[] =
10403         { N0.getOperand(0), N0.getOperand(1),
10404           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10405           N0.getOperand(2) };
10406       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10407     }
10408 
10409     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10410     //      (select_cc x, y, 1.0, 0.0,, cc)
10411     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10412         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10413         (!LegalOperations ||
10414          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10415       SDLoc DL(N);
10416       SDValue Ops[] =
10417         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10418           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10419           N0.getOperand(0).getOperand(2) };
10420       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10421     }
10422   }
10423 
10424   return SDValue();
10425 }
10426 
10427 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10428   SDValue N0 = N->getOperand(0);
10429   EVT VT = N->getValueType(0);
10430   EVT OpVT = N0.getValueType();
10431 
10432   // fold (uint_to_fp c1) -> c1fp
10433   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10434       // ...but only if the target supports immediate floating-point values
10435       (!LegalOperations ||
10436        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10437     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10438 
10439   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10440   // but SINT_TO_FP is legal on this target, try to convert.
10441   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10442       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10443     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10444     if (DAG.SignBitIsZero(N0))
10445       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10446   }
10447 
10448   // The next optimizations are desirable only if SELECT_CC can be lowered.
10449   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10450     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10451 
10452     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10453         (!LegalOperations ||
10454          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10455       SDLoc DL(N);
10456       SDValue Ops[] =
10457         { N0.getOperand(0), N0.getOperand(1),
10458           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10459           N0.getOperand(2) };
10460       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10461     }
10462   }
10463 
10464   return SDValue();
10465 }
10466 
10467 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10468 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10469   SDValue N0 = N->getOperand(0);
10470   EVT VT = N->getValueType(0);
10471 
10472   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10473     return SDValue();
10474 
10475   SDValue Src = N0.getOperand(0);
10476   EVT SrcVT = Src.getValueType();
10477   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10478   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10479 
10480   // We can safely assume the conversion won't overflow the output range,
10481   // because (for example) (uint8_t)18293.f is undefined behavior.
10482 
10483   // Since we can assume the conversion won't overflow, our decision as to
10484   // whether the input will fit in the float should depend on the minimum
10485   // of the input range and output range.
10486 
10487   // This means this is also safe for a signed input and unsigned output, since
10488   // a negative input would lead to undefined behavior.
10489   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10490   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10491   unsigned ActualSize = std::min(InputSize, OutputSize);
10492   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10493 
10494   // We can only fold away the float conversion if the input range can be
10495   // represented exactly in the float range.
10496   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10497     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10498       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10499                                                        : ISD::ZERO_EXTEND;
10500       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10501     }
10502     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10503       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10504     return DAG.getBitcast(VT, Src);
10505   }
10506   return SDValue();
10507 }
10508 
10509 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10510   SDValue N0 = N->getOperand(0);
10511   EVT VT = N->getValueType(0);
10512 
10513   // fold (fp_to_sint c1fp) -> c1
10514   if (isConstantFPBuildVectorOrConstantFP(N0))
10515     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10516 
10517   return FoldIntToFPToInt(N, DAG);
10518 }
10519 
10520 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10521   SDValue N0 = N->getOperand(0);
10522   EVT VT = N->getValueType(0);
10523 
10524   // fold (fp_to_uint c1fp) -> c1
10525   if (isConstantFPBuildVectorOrConstantFP(N0))
10526     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10527 
10528   return FoldIntToFPToInt(N, DAG);
10529 }
10530 
10531 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10532   SDValue N0 = N->getOperand(0);
10533   SDValue N1 = N->getOperand(1);
10534   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10535   EVT VT = N->getValueType(0);
10536 
10537   // fold (fp_round c1fp) -> c1fp
10538   if (N0CFP)
10539     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10540 
10541   // fold (fp_round (fp_extend x)) -> x
10542   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10543     return N0.getOperand(0);
10544 
10545   // fold (fp_round (fp_round x)) -> (fp_round x)
10546   if (N0.getOpcode() == ISD::FP_ROUND) {
10547     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10548     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10549 
10550     // Skip this folding if it results in an fp_round from f80 to f16.
10551     //
10552     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10553     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10554     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10555     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10556     // x86.
10557     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10558       return SDValue();
10559 
10560     // If the first fp_round isn't a value preserving truncation, it might
10561     // introduce a tie in the second fp_round, that wouldn't occur in the
10562     // single-step fp_round we want to fold to.
10563     // In other words, double rounding isn't the same as rounding.
10564     // Also, this is a value preserving truncation iff both fp_round's are.
10565     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10566       SDLoc DL(N);
10567       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10568                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10569     }
10570   }
10571 
10572   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10573   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10574     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10575                               N0.getOperand(0), N1);
10576     AddToWorklist(Tmp.getNode());
10577     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10578                        Tmp, N0.getOperand(1));
10579   }
10580 
10581   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10582     return NewVSel;
10583 
10584   return SDValue();
10585 }
10586 
10587 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10588   SDValue N0 = N->getOperand(0);
10589   EVT VT = N->getValueType(0);
10590   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10591   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10592 
10593   // fold (fp_round_inreg c1fp) -> c1fp
10594   if (N0CFP && isTypeLegal(EVT)) {
10595     SDLoc DL(N);
10596     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10597     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10598   }
10599 
10600   return SDValue();
10601 }
10602 
10603 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10604   SDValue N0 = N->getOperand(0);
10605   EVT VT = N->getValueType(0);
10606 
10607   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10608   if (N->hasOneUse() &&
10609       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10610     return SDValue();
10611 
10612   // fold (fp_extend c1fp) -> c1fp
10613   if (isConstantFPBuildVectorOrConstantFP(N0))
10614     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10615 
10616   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10617   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10618       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10619     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10620 
10621   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10622   // value of X.
10623   if (N0.getOpcode() == ISD::FP_ROUND
10624       && N0.getConstantOperandVal(1) == 1) {
10625     SDValue In = N0.getOperand(0);
10626     if (In.getValueType() == VT) return In;
10627     if (VT.bitsLT(In.getValueType()))
10628       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10629                          In, N0.getOperand(1));
10630     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10631   }
10632 
10633   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10634   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10635        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10636     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10637     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10638                                      LN0->getChain(),
10639                                      LN0->getBasePtr(), N0.getValueType(),
10640                                      LN0->getMemOperand());
10641     CombineTo(N, ExtLoad);
10642     CombineTo(N0.getNode(),
10643               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10644                           N0.getValueType(), ExtLoad,
10645                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10646               ExtLoad.getValue(1));
10647     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10648   }
10649 
10650   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10651     return NewVSel;
10652 
10653   return SDValue();
10654 }
10655 
10656 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10657   SDValue N0 = N->getOperand(0);
10658   EVT VT = N->getValueType(0);
10659 
10660   // fold (fceil c1) -> fceil(c1)
10661   if (isConstantFPBuildVectorOrConstantFP(N0))
10662     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10663 
10664   return SDValue();
10665 }
10666 
10667 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10668   SDValue N0 = N->getOperand(0);
10669   EVT VT = N->getValueType(0);
10670 
10671   // fold (ftrunc c1) -> ftrunc(c1)
10672   if (isConstantFPBuildVectorOrConstantFP(N0))
10673     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10674 
10675   return SDValue();
10676 }
10677 
10678 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10679   SDValue N0 = N->getOperand(0);
10680   EVT VT = N->getValueType(0);
10681 
10682   // fold (ffloor c1) -> ffloor(c1)
10683   if (isConstantFPBuildVectorOrConstantFP(N0))
10684     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10685 
10686   return SDValue();
10687 }
10688 
10689 // FIXME: FNEG and FABS have a lot in common; refactor.
10690 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10691   SDValue N0 = N->getOperand(0);
10692   EVT VT = N->getValueType(0);
10693 
10694   // Constant fold FNEG.
10695   if (isConstantFPBuildVectorOrConstantFP(N0))
10696     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10697 
10698   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10699                          &DAG.getTarget().Options))
10700     return GetNegatedExpression(N0, DAG, LegalOperations);
10701 
10702   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10703   // constant pool values.
10704   if (!TLI.isFNegFree(VT) &&
10705       N0.getOpcode() == ISD::BITCAST &&
10706       N0.getNode()->hasOneUse()) {
10707     SDValue Int = N0.getOperand(0);
10708     EVT IntVT = Int.getValueType();
10709     if (IntVT.isInteger() && !IntVT.isVector()) {
10710       APInt SignMask;
10711       if (N0.getValueType().isVector()) {
10712         // For a vector, get a mask such as 0x80... per scalar element
10713         // and splat it.
10714         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10715         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10716       } else {
10717         // For a scalar, just generate 0x80...
10718         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10719       }
10720       SDLoc DL0(N0);
10721       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10722                         DAG.getConstant(SignMask, DL0, IntVT));
10723       AddToWorklist(Int.getNode());
10724       return DAG.getBitcast(VT, Int);
10725     }
10726   }
10727 
10728   // (fneg (fmul c, x)) -> (fmul -c, x)
10729   if (N0.getOpcode() == ISD::FMUL &&
10730       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10731     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10732     if (CFP1) {
10733       APFloat CVal = CFP1->getValueAPF();
10734       CVal.changeSign();
10735       if (Level >= AfterLegalizeDAG &&
10736           (TLI.isFPImmLegal(CVal, VT) ||
10737            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10738         return DAG.getNode(
10739             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10740             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10741             N0->getFlags());
10742     }
10743   }
10744 
10745   return SDValue();
10746 }
10747 
10748 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10749   SDValue N0 = N->getOperand(0);
10750   SDValue N1 = N->getOperand(1);
10751   EVT VT = N->getValueType(0);
10752   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10753   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10754 
10755   if (N0CFP && N1CFP) {
10756     const APFloat &C0 = N0CFP->getValueAPF();
10757     const APFloat &C1 = N1CFP->getValueAPF();
10758     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10759   }
10760 
10761   // Canonicalize to constant on RHS.
10762   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10763      !isConstantFPBuildVectorOrConstantFP(N1))
10764     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10765 
10766   return SDValue();
10767 }
10768 
10769 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10770   SDValue N0 = N->getOperand(0);
10771   SDValue N1 = N->getOperand(1);
10772   EVT VT = N->getValueType(0);
10773   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10774   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10775 
10776   if (N0CFP && N1CFP) {
10777     const APFloat &C0 = N0CFP->getValueAPF();
10778     const APFloat &C1 = N1CFP->getValueAPF();
10779     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10780   }
10781 
10782   // Canonicalize to constant on RHS.
10783   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10784      !isConstantFPBuildVectorOrConstantFP(N1))
10785     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10786 
10787   return SDValue();
10788 }
10789 
10790 SDValue DAGCombiner::visitFABS(SDNode *N) {
10791   SDValue N0 = N->getOperand(0);
10792   EVT VT = N->getValueType(0);
10793 
10794   // fold (fabs c1) -> fabs(c1)
10795   if (isConstantFPBuildVectorOrConstantFP(N0))
10796     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10797 
10798   // fold (fabs (fabs x)) -> (fabs x)
10799   if (N0.getOpcode() == ISD::FABS)
10800     return N->getOperand(0);
10801 
10802   // fold (fabs (fneg x)) -> (fabs x)
10803   // fold (fabs (fcopysign x, y)) -> (fabs x)
10804   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10805     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10806 
10807   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10808   // constant pool values.
10809   if (!TLI.isFAbsFree(VT) &&
10810       N0.getOpcode() == ISD::BITCAST &&
10811       N0.getNode()->hasOneUse()) {
10812     SDValue Int = N0.getOperand(0);
10813     EVT IntVT = Int.getValueType();
10814     if (IntVT.isInteger() && !IntVT.isVector()) {
10815       APInt SignMask;
10816       if (N0.getValueType().isVector()) {
10817         // For a vector, get a mask such as 0x7f... per scalar element
10818         // and splat it.
10819         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10820         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10821       } else {
10822         // For a scalar, just generate 0x7f...
10823         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10824       }
10825       SDLoc DL(N0);
10826       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10827                         DAG.getConstant(SignMask, DL, IntVT));
10828       AddToWorklist(Int.getNode());
10829       return DAG.getBitcast(N->getValueType(0), Int);
10830     }
10831   }
10832 
10833   return SDValue();
10834 }
10835 
10836 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10837   SDValue Chain = N->getOperand(0);
10838   SDValue N1 = N->getOperand(1);
10839   SDValue N2 = N->getOperand(2);
10840 
10841   // If N is a constant we could fold this into a fallthrough or unconditional
10842   // branch. However that doesn't happen very often in normal code, because
10843   // Instcombine/SimplifyCFG should have handled the available opportunities.
10844   // If we did this folding here, it would be necessary to update the
10845   // MachineBasicBlock CFG, which is awkward.
10846 
10847   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10848   // on the target.
10849   if (N1.getOpcode() == ISD::SETCC &&
10850       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10851                                    N1.getOperand(0).getValueType())) {
10852     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10853                        Chain, N1.getOperand(2),
10854                        N1.getOperand(0), N1.getOperand(1), N2);
10855   }
10856 
10857   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10858       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10859        (N1.getOperand(0).hasOneUse() &&
10860         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10861     SDNode *Trunc = nullptr;
10862     if (N1.getOpcode() == ISD::TRUNCATE) {
10863       // Look pass the truncate.
10864       Trunc = N1.getNode();
10865       N1 = N1.getOperand(0);
10866     }
10867 
10868     // Match this pattern so that we can generate simpler code:
10869     //
10870     //   %a = ...
10871     //   %b = and i32 %a, 2
10872     //   %c = srl i32 %b, 1
10873     //   brcond i32 %c ...
10874     //
10875     // into
10876     //
10877     //   %a = ...
10878     //   %b = and i32 %a, 2
10879     //   %c = setcc eq %b, 0
10880     //   brcond %c ...
10881     //
10882     // This applies only when the AND constant value has one bit set and the
10883     // SRL constant is equal to the log2 of the AND constant. The back-end is
10884     // smart enough to convert the result into a TEST/JMP sequence.
10885     SDValue Op0 = N1.getOperand(0);
10886     SDValue Op1 = N1.getOperand(1);
10887 
10888     if (Op0.getOpcode() == ISD::AND &&
10889         Op1.getOpcode() == ISD::Constant) {
10890       SDValue AndOp1 = Op0.getOperand(1);
10891 
10892       if (AndOp1.getOpcode() == ISD::Constant) {
10893         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10894 
10895         if (AndConst.isPowerOf2() &&
10896             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10897           SDLoc DL(N);
10898           SDValue SetCC =
10899             DAG.getSetCC(DL,
10900                          getSetCCResultType(Op0.getValueType()),
10901                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10902                          ISD::SETNE);
10903 
10904           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10905                                           MVT::Other, Chain, SetCC, N2);
10906           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10907           // will convert it back to (X & C1) >> C2.
10908           CombineTo(N, NewBRCond, false);
10909           // Truncate is dead.
10910           if (Trunc)
10911             deleteAndRecombine(Trunc);
10912           // Replace the uses of SRL with SETCC
10913           WorklistRemover DeadNodes(*this);
10914           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10915           deleteAndRecombine(N1.getNode());
10916           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10917         }
10918       }
10919     }
10920 
10921     if (Trunc)
10922       // Restore N1 if the above transformation doesn't match.
10923       N1 = N->getOperand(1);
10924   }
10925 
10926   // Transform br(xor(x, y)) -> br(x != y)
10927   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10928   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10929     SDNode *TheXor = N1.getNode();
10930     SDValue Op0 = TheXor->getOperand(0);
10931     SDValue Op1 = TheXor->getOperand(1);
10932     if (Op0.getOpcode() == Op1.getOpcode()) {
10933       // Avoid missing important xor optimizations.
10934       if (SDValue Tmp = visitXOR(TheXor)) {
10935         if (Tmp.getNode() != TheXor) {
10936           DEBUG(dbgs() << "\nReplacing.8 ";
10937                 TheXor->dump(&DAG);
10938                 dbgs() << "\nWith: ";
10939                 Tmp.getNode()->dump(&DAG);
10940                 dbgs() << '\n');
10941           WorklistRemover DeadNodes(*this);
10942           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10943           deleteAndRecombine(TheXor);
10944           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10945                              MVT::Other, Chain, Tmp, N2);
10946         }
10947 
10948         // visitXOR has changed XOR's operands or replaced the XOR completely,
10949         // bail out.
10950         return SDValue(N, 0);
10951       }
10952     }
10953 
10954     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10955       bool Equal = false;
10956       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10957           Op0.getOpcode() == ISD::XOR) {
10958         TheXor = Op0.getNode();
10959         Equal = true;
10960       }
10961 
10962       EVT SetCCVT = N1.getValueType();
10963       if (LegalTypes)
10964         SetCCVT = getSetCCResultType(SetCCVT);
10965       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10966                                    SetCCVT,
10967                                    Op0, Op1,
10968                                    Equal ? ISD::SETEQ : ISD::SETNE);
10969       // Replace the uses of XOR with SETCC
10970       WorklistRemover DeadNodes(*this);
10971       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10972       deleteAndRecombine(N1.getNode());
10973       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10974                          MVT::Other, Chain, SetCC, N2);
10975     }
10976   }
10977 
10978   return SDValue();
10979 }
10980 
10981 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10982 //
10983 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10984   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10985   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10986 
10987   // If N is a constant we could fold this into a fallthrough or unconditional
10988   // branch. However that doesn't happen very often in normal code, because
10989   // Instcombine/SimplifyCFG should have handled the available opportunities.
10990   // If we did this folding here, it would be necessary to update the
10991   // MachineBasicBlock CFG, which is awkward.
10992 
10993   // Use SimplifySetCC to simplify SETCC's.
10994   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10995                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10996                                false);
10997   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10998 
10999   // fold to a simpler setcc
11000   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11001     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11002                        N->getOperand(0), Simp.getOperand(2),
11003                        Simp.getOperand(0), Simp.getOperand(1),
11004                        N->getOperand(4));
11005 
11006   return SDValue();
11007 }
11008 
11009 /// Return true if 'Use' is a load or a store that uses N as its base pointer
11010 /// and that N may be folded in the load / store addressing mode.
11011 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11012                                     SelectionDAG &DAG,
11013                                     const TargetLowering &TLI) {
11014   EVT VT;
11015   unsigned AS;
11016 
11017   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
11018     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11019       return false;
11020     VT = LD->getMemoryVT();
11021     AS = LD->getAddressSpace();
11022   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
11023     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11024       return false;
11025     VT = ST->getMemoryVT();
11026     AS = ST->getAddressSpace();
11027   } else
11028     return false;
11029 
11030   TargetLowering::AddrMode AM;
11031   if (N->getOpcode() == ISD::ADD) {
11032     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11033     if (Offset)
11034       // [reg +/- imm]
11035       AM.BaseOffs = Offset->getSExtValue();
11036     else
11037       // [reg +/- reg]
11038       AM.Scale = 1;
11039   } else if (N->getOpcode() == ISD::SUB) {
11040     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11041     if (Offset)
11042       // [reg +/- imm]
11043       AM.BaseOffs = -Offset->getSExtValue();
11044     else
11045       // [reg +/- reg]
11046       AM.Scale = 1;
11047   } else
11048     return false;
11049 
11050   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11051                                    VT.getTypeForEVT(*DAG.getContext()), AS);
11052 }
11053 
11054 /// Try turning a load/store into a pre-indexed load/store when the base
11055 /// pointer is an add or subtract and it has other uses besides the load/store.
11056 /// After the transformation, the new indexed load/store has effectively folded
11057 /// the add/subtract in and all of its other uses are redirected to the
11058 /// new load/store.
11059 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11060   if (Level < AfterLegalizeDAG)
11061     return false;
11062 
11063   bool isLoad = true;
11064   SDValue Ptr;
11065   EVT VT;
11066   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11067     if (LD->isIndexed())
11068       return false;
11069     VT = LD->getMemoryVT();
11070     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11071         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11072       return false;
11073     Ptr = LD->getBasePtr();
11074   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11075     if (ST->isIndexed())
11076       return false;
11077     VT = ST->getMemoryVT();
11078     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11079         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11080       return false;
11081     Ptr = ST->getBasePtr();
11082     isLoad = false;
11083   } else {
11084     return false;
11085   }
11086 
11087   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11088   // out.  There is no reason to make this a preinc/predec.
11089   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11090       Ptr.getNode()->hasOneUse())
11091     return false;
11092 
11093   // Ask the target to do addressing mode selection.
11094   SDValue BasePtr;
11095   SDValue Offset;
11096   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11097   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11098     return false;
11099 
11100   // Backends without true r+i pre-indexed forms may need to pass a
11101   // constant base with a variable offset so that constant coercion
11102   // will work with the patterns in canonical form.
11103   bool Swapped = false;
11104   if (isa<ConstantSDNode>(BasePtr)) {
11105     std::swap(BasePtr, Offset);
11106     Swapped = true;
11107   }
11108 
11109   // Don't create a indexed load / store with zero offset.
11110   if (isNullConstant(Offset))
11111     return false;
11112 
11113   // Try turning it into a pre-indexed load / store except when:
11114   // 1) The new base ptr is a frame index.
11115   // 2) If N is a store and the new base ptr is either the same as or is a
11116   //    predecessor of the value being stored.
11117   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11118   //    that would create a cycle.
11119   // 4) All uses are load / store ops that use it as old base ptr.
11120 
11121   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11122   // (plus the implicit offset) to a register to preinc anyway.
11123   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11124     return false;
11125 
11126   // Check #2.
11127   if (!isLoad) {
11128     SDValue Val = cast<StoreSDNode>(N)->getValue();
11129     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11130       return false;
11131   }
11132 
11133   // Caches for hasPredecessorHelper.
11134   SmallPtrSet<const SDNode *, 32> Visited;
11135   SmallVector<const SDNode *, 16> Worklist;
11136   Worklist.push_back(N);
11137 
11138   // If the offset is a constant, there may be other adds of constants that
11139   // can be folded with this one. We should do this to avoid having to keep
11140   // a copy of the original base pointer.
11141   SmallVector<SDNode *, 16> OtherUses;
11142   if (isa<ConstantSDNode>(Offset))
11143     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11144                               UE = BasePtr.getNode()->use_end();
11145          UI != UE; ++UI) {
11146       SDUse &Use = UI.getUse();
11147       // Skip the use that is Ptr and uses of other results from BasePtr's
11148       // node (important for nodes that return multiple results).
11149       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11150         continue;
11151 
11152       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11153         continue;
11154 
11155       if (Use.getUser()->getOpcode() != ISD::ADD &&
11156           Use.getUser()->getOpcode() != ISD::SUB) {
11157         OtherUses.clear();
11158         break;
11159       }
11160 
11161       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11162       if (!isa<ConstantSDNode>(Op1)) {
11163         OtherUses.clear();
11164         break;
11165       }
11166 
11167       // FIXME: In some cases, we can be smarter about this.
11168       if (Op1.getValueType() != Offset.getValueType()) {
11169         OtherUses.clear();
11170         break;
11171       }
11172 
11173       OtherUses.push_back(Use.getUser());
11174     }
11175 
11176   if (Swapped)
11177     std::swap(BasePtr, Offset);
11178 
11179   // Now check for #3 and #4.
11180   bool RealUse = false;
11181 
11182   for (SDNode *Use : Ptr.getNode()->uses()) {
11183     if (Use == N)
11184       continue;
11185     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11186       return false;
11187 
11188     // If Ptr may be folded in addressing mode of other use, then it's
11189     // not profitable to do this transformation.
11190     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11191       RealUse = true;
11192   }
11193 
11194   if (!RealUse)
11195     return false;
11196 
11197   SDValue Result;
11198   if (isLoad)
11199     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11200                                 BasePtr, Offset, AM);
11201   else
11202     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11203                                  BasePtr, Offset, AM);
11204   ++PreIndexedNodes;
11205   ++NodesCombined;
11206   DEBUG(dbgs() << "\nReplacing.4 ";
11207         N->dump(&DAG);
11208         dbgs() << "\nWith: ";
11209         Result.getNode()->dump(&DAG);
11210         dbgs() << '\n');
11211   WorklistRemover DeadNodes(*this);
11212   if (isLoad) {
11213     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11214     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11215   } else {
11216     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11217   }
11218 
11219   // Finally, since the node is now dead, remove it from the graph.
11220   deleteAndRecombine(N);
11221 
11222   if (Swapped)
11223     std::swap(BasePtr, Offset);
11224 
11225   // Replace other uses of BasePtr that can be updated to use Ptr
11226   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11227     unsigned OffsetIdx = 1;
11228     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11229       OffsetIdx = 0;
11230     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11231            BasePtr.getNode() && "Expected BasePtr operand");
11232 
11233     // We need to replace ptr0 in the following expression:
11234     //   x0 * offset0 + y0 * ptr0 = t0
11235     // knowing that
11236     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11237     //
11238     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11239     // indexed load/store and the expression that needs to be re-written.
11240     //
11241     // Therefore, we have:
11242     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11243 
11244     ConstantSDNode *CN =
11245       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11246     int X0, X1, Y0, Y1;
11247     const APInt &Offset0 = CN->getAPIntValue();
11248     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11249 
11250     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11251     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11252     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11253     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11254 
11255     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11256 
11257     APInt CNV = Offset0;
11258     if (X0 < 0) CNV = -CNV;
11259     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11260     else CNV = CNV - Offset1;
11261 
11262     SDLoc DL(OtherUses[i]);
11263 
11264     // We can now generate the new expression.
11265     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11266     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11267 
11268     SDValue NewUse = DAG.getNode(Opcode,
11269                                  DL,
11270                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11271     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11272     deleteAndRecombine(OtherUses[i]);
11273   }
11274 
11275   // Replace the uses of Ptr with uses of the updated base value.
11276   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11277   deleteAndRecombine(Ptr.getNode());
11278 
11279   return true;
11280 }
11281 
11282 /// Try to combine a load/store with a add/sub of the base pointer node into a
11283 /// post-indexed load/store. The transformation folded the add/subtract into the
11284 /// new indexed load/store effectively and all of its uses are redirected to the
11285 /// new load/store.
11286 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11287   if (Level < AfterLegalizeDAG)
11288     return false;
11289 
11290   bool isLoad = true;
11291   SDValue Ptr;
11292   EVT VT;
11293   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11294     if (LD->isIndexed())
11295       return false;
11296     VT = LD->getMemoryVT();
11297     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11298         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11299       return false;
11300     Ptr = LD->getBasePtr();
11301   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11302     if (ST->isIndexed())
11303       return false;
11304     VT = ST->getMemoryVT();
11305     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11306         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11307       return false;
11308     Ptr = ST->getBasePtr();
11309     isLoad = false;
11310   } else {
11311     return false;
11312   }
11313 
11314   if (Ptr.getNode()->hasOneUse())
11315     return false;
11316 
11317   for (SDNode *Op : Ptr.getNode()->uses()) {
11318     if (Op == N ||
11319         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11320       continue;
11321 
11322     SDValue BasePtr;
11323     SDValue Offset;
11324     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11325     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11326       // Don't create a indexed load / store with zero offset.
11327       if (isNullConstant(Offset))
11328         continue;
11329 
11330       // Try turning it into a post-indexed load / store except when
11331       // 1) All uses are load / store ops that use it as base ptr (and
11332       //    it may be folded as addressing mmode).
11333       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11334       //    nor a successor of N. Otherwise, if Op is folded that would
11335       //    create a cycle.
11336 
11337       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11338         continue;
11339 
11340       // Check for #1.
11341       bool TryNext = false;
11342       for (SDNode *Use : BasePtr.getNode()->uses()) {
11343         if (Use == Ptr.getNode())
11344           continue;
11345 
11346         // If all the uses are load / store addresses, then don't do the
11347         // transformation.
11348         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11349           bool RealUse = false;
11350           for (SDNode *UseUse : Use->uses()) {
11351             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11352               RealUse = true;
11353           }
11354 
11355           if (!RealUse) {
11356             TryNext = true;
11357             break;
11358           }
11359         }
11360       }
11361 
11362       if (TryNext)
11363         continue;
11364 
11365       // Check for #2
11366       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11367         SDValue Result = isLoad
11368           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11369                                BasePtr, Offset, AM)
11370           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11371                                 BasePtr, Offset, AM);
11372         ++PostIndexedNodes;
11373         ++NodesCombined;
11374         DEBUG(dbgs() << "\nReplacing.5 ";
11375               N->dump(&DAG);
11376               dbgs() << "\nWith: ";
11377               Result.getNode()->dump(&DAG);
11378               dbgs() << '\n');
11379         WorklistRemover DeadNodes(*this);
11380         if (isLoad) {
11381           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11382           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11383         } else {
11384           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11385         }
11386 
11387         // Finally, since the node is now dead, remove it from the graph.
11388         deleteAndRecombine(N);
11389 
11390         // Replace the uses of Use with uses of the updated base value.
11391         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11392                                       Result.getValue(isLoad ? 1 : 0));
11393         deleteAndRecombine(Op);
11394         return true;
11395       }
11396     }
11397   }
11398 
11399   return false;
11400 }
11401 
11402 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11403 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11404   ISD::MemIndexedMode AM = LD->getAddressingMode();
11405   assert(AM != ISD::UNINDEXED);
11406   SDValue BP = LD->getOperand(1);
11407   SDValue Inc = LD->getOperand(2);
11408 
11409   // Some backends use TargetConstants for load offsets, but don't expect
11410   // TargetConstants in general ADD nodes. We can convert these constants into
11411   // regular Constants (if the constant is not opaque).
11412   assert((Inc.getOpcode() != ISD::TargetConstant ||
11413           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11414          "Cannot split out indexing using opaque target constants");
11415   if (Inc.getOpcode() == ISD::TargetConstant) {
11416     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11417     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11418                           ConstInc->getValueType(0));
11419   }
11420 
11421   unsigned Opc =
11422       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11423   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11424 }
11425 
11426 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11427   LoadSDNode *LD  = cast<LoadSDNode>(N);
11428   SDValue Chain = LD->getChain();
11429   SDValue Ptr   = LD->getBasePtr();
11430 
11431   // If load is not volatile and there are no uses of the loaded value (and
11432   // the updated indexed value in case of indexed loads), change uses of the
11433   // chain value into uses of the chain input (i.e. delete the dead load).
11434   if (!LD->isVolatile()) {
11435     if (N->getValueType(1) == MVT::Other) {
11436       // Unindexed loads.
11437       if (!N->hasAnyUseOfValue(0)) {
11438         // It's not safe to use the two value CombineTo variant here. e.g.
11439         // v1, chain2 = load chain1, loc
11440         // v2, chain3 = load chain2, loc
11441         // v3         = add v2, c
11442         // Now we replace use of chain2 with chain1.  This makes the second load
11443         // isomorphic to the one we are deleting, and thus makes this load live.
11444         DEBUG(dbgs() << "\nReplacing.6 ";
11445               N->dump(&DAG);
11446               dbgs() << "\nWith chain: ";
11447               Chain.getNode()->dump(&DAG);
11448               dbgs() << "\n");
11449         WorklistRemover DeadNodes(*this);
11450         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11451         AddUsersToWorklist(Chain.getNode());
11452         if (N->use_empty())
11453           deleteAndRecombine(N);
11454 
11455         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11456       }
11457     } else {
11458       // Indexed loads.
11459       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11460 
11461       // If this load has an opaque TargetConstant offset, then we cannot split
11462       // the indexing into an add/sub directly (that TargetConstant may not be
11463       // valid for a different type of node, and we cannot convert an opaque
11464       // target constant into a regular constant).
11465       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11466                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11467 
11468       if (!N->hasAnyUseOfValue(0) &&
11469           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11470         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11471         SDValue Index;
11472         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11473           Index = SplitIndexingFromLoad(LD);
11474           // Try to fold the base pointer arithmetic into subsequent loads and
11475           // stores.
11476           AddUsersToWorklist(N);
11477         } else
11478           Index = DAG.getUNDEF(N->getValueType(1));
11479         DEBUG(dbgs() << "\nReplacing.7 ";
11480               N->dump(&DAG);
11481               dbgs() << "\nWith: ";
11482               Undef.getNode()->dump(&DAG);
11483               dbgs() << " and 2 other values\n");
11484         WorklistRemover DeadNodes(*this);
11485         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11486         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11487         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11488         deleteAndRecombine(N);
11489         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11490       }
11491     }
11492   }
11493 
11494   // If this load is directly stored, replace the load value with the stored
11495   // value.
11496   // TODO: Handle store large -> read small portion.
11497   // TODO: Handle TRUNCSTORE/LOADEXT
11498   if (OptLevel != CodeGenOpt::None &&
11499       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11500     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11501       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11502       if (PrevST->getBasePtr() == Ptr &&
11503           PrevST->getValue().getValueType() == N->getValueType(0))
11504         return CombineTo(N, PrevST->getOperand(1), Chain);
11505     }
11506   }
11507 
11508   // Try to infer better alignment information than the load already has.
11509   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11510     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11511       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11512         SDValue NewLoad = DAG.getExtLoad(
11513             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11514             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11515             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11516         if (NewLoad.getNode() != N)
11517           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11518       }
11519     }
11520   }
11521 
11522   if (LD->isUnindexed()) {
11523     // Walk up chain skipping non-aliasing memory nodes.
11524     SDValue BetterChain = FindBetterChain(N, Chain);
11525 
11526     // If there is a better chain.
11527     if (Chain != BetterChain) {
11528       SDValue ReplLoad;
11529 
11530       // Replace the chain to void dependency.
11531       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11532         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11533                                BetterChain, Ptr, LD->getMemOperand());
11534       } else {
11535         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11536                                   LD->getValueType(0),
11537                                   BetterChain, Ptr, LD->getMemoryVT(),
11538                                   LD->getMemOperand());
11539       }
11540 
11541       // Create token factor to keep old chain connected.
11542       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11543                                   MVT::Other, Chain, ReplLoad.getValue(1));
11544 
11545       // Replace uses with load result and token factor
11546       return CombineTo(N, ReplLoad.getValue(0), Token);
11547     }
11548   }
11549 
11550   // Try transforming N to an indexed load.
11551   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11552     return SDValue(N, 0);
11553 
11554   // Try to slice up N to more direct loads if the slices are mapped to
11555   // different register banks or pairing can take place.
11556   if (SliceUpLoad(N))
11557     return SDValue(N, 0);
11558 
11559   return SDValue();
11560 }
11561 
11562 namespace {
11563 /// \brief Helper structure used to slice a load in smaller loads.
11564 /// Basically a slice is obtained from the following sequence:
11565 /// Origin = load Ty1, Base
11566 /// Shift = srl Ty1 Origin, CstTy Amount
11567 /// Inst = trunc Shift to Ty2
11568 ///
11569 /// Then, it will be rewritten into:
11570 /// Slice = load SliceTy, Base + SliceOffset
11571 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11572 ///
11573 /// SliceTy is deduced from the number of bits that are actually used to
11574 /// build Inst.
11575 struct LoadedSlice {
11576   /// \brief Helper structure used to compute the cost of a slice.
11577   struct Cost {
11578     /// Are we optimizing for code size.
11579     bool ForCodeSize;
11580     /// Various cost.
11581     unsigned Loads;
11582     unsigned Truncates;
11583     unsigned CrossRegisterBanksCopies;
11584     unsigned ZExts;
11585     unsigned Shift;
11586 
11587     Cost(bool ForCodeSize = false)
11588         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11589           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11590 
11591     /// \brief Get the cost of one isolated slice.
11592     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11593         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11594           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11595       EVT TruncType = LS.Inst->getValueType(0);
11596       EVT LoadedType = LS.getLoadedType();
11597       if (TruncType != LoadedType &&
11598           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11599         ZExts = 1;
11600     }
11601 
11602     /// \brief Account for slicing gain in the current cost.
11603     /// Slicing provide a few gains like removing a shift or a
11604     /// truncate. This method allows to grow the cost of the original
11605     /// load with the gain from this slice.
11606     void addSliceGain(const LoadedSlice &LS) {
11607       // Each slice saves a truncate.
11608       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11609       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11610                               LS.Inst->getValueType(0)))
11611         ++Truncates;
11612       // If there is a shift amount, this slice gets rid of it.
11613       if (LS.Shift)
11614         ++Shift;
11615       // If this slice can merge a cross register bank copy, account for it.
11616       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11617         ++CrossRegisterBanksCopies;
11618     }
11619 
11620     Cost &operator+=(const Cost &RHS) {
11621       Loads += RHS.Loads;
11622       Truncates += RHS.Truncates;
11623       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11624       ZExts += RHS.ZExts;
11625       Shift += RHS.Shift;
11626       return *this;
11627     }
11628 
11629     bool operator==(const Cost &RHS) const {
11630       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11631              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11632              ZExts == RHS.ZExts && Shift == RHS.Shift;
11633     }
11634 
11635     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11636 
11637     bool operator<(const Cost &RHS) const {
11638       // Assume cross register banks copies are as expensive as loads.
11639       // FIXME: Do we want some more target hooks?
11640       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11641       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11642       // Unless we are optimizing for code size, consider the
11643       // expensive operation first.
11644       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11645         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11646       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11647              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11648     }
11649 
11650     bool operator>(const Cost &RHS) const { return RHS < *this; }
11651 
11652     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11653 
11654     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11655   };
11656   // The last instruction that represent the slice. This should be a
11657   // truncate instruction.
11658   SDNode *Inst;
11659   // The original load instruction.
11660   LoadSDNode *Origin;
11661   // The right shift amount in bits from the original load.
11662   unsigned Shift;
11663   // The DAG from which Origin came from.
11664   // This is used to get some contextual information about legal types, etc.
11665   SelectionDAG *DAG;
11666 
11667   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11668               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11669       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11670 
11671   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11672   /// \return Result is \p BitWidth and has used bits set to 1 and
11673   ///         not used bits set to 0.
11674   APInt getUsedBits() const {
11675     // Reproduce the trunc(lshr) sequence:
11676     // - Start from the truncated value.
11677     // - Zero extend to the desired bit width.
11678     // - Shift left.
11679     assert(Origin && "No original load to compare against.");
11680     unsigned BitWidth = Origin->getValueSizeInBits(0);
11681     assert(Inst && "This slice is not bound to an instruction");
11682     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11683            "Extracted slice is bigger than the whole type!");
11684     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11685     UsedBits.setAllBits();
11686     UsedBits = UsedBits.zext(BitWidth);
11687     UsedBits <<= Shift;
11688     return UsedBits;
11689   }
11690 
11691   /// \brief Get the size of the slice to be loaded in bytes.
11692   unsigned getLoadedSize() const {
11693     unsigned SliceSize = getUsedBits().countPopulation();
11694     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11695     return SliceSize / 8;
11696   }
11697 
11698   /// \brief Get the type that will be loaded for this slice.
11699   /// Note: This may not be the final type for the slice.
11700   EVT getLoadedType() const {
11701     assert(DAG && "Missing context");
11702     LLVMContext &Ctxt = *DAG->getContext();
11703     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11704   }
11705 
11706   /// \brief Get the alignment of the load used for this slice.
11707   unsigned getAlignment() const {
11708     unsigned Alignment = Origin->getAlignment();
11709     unsigned Offset = getOffsetFromBase();
11710     if (Offset != 0)
11711       Alignment = MinAlign(Alignment, Alignment + Offset);
11712     return Alignment;
11713   }
11714 
11715   /// \brief Check if this slice can be rewritten with legal operations.
11716   bool isLegal() const {
11717     // An invalid slice is not legal.
11718     if (!Origin || !Inst || !DAG)
11719       return false;
11720 
11721     // Offsets are for indexed load only, we do not handle that.
11722     if (!Origin->getOffset().isUndef())
11723       return false;
11724 
11725     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11726 
11727     // Check that the type is legal.
11728     EVT SliceType = getLoadedType();
11729     if (!TLI.isTypeLegal(SliceType))
11730       return false;
11731 
11732     // Check that the load is legal for this type.
11733     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11734       return false;
11735 
11736     // Check that the offset can be computed.
11737     // 1. Check its type.
11738     EVT PtrType = Origin->getBasePtr().getValueType();
11739     if (PtrType == MVT::Untyped || PtrType.isExtended())
11740       return false;
11741 
11742     // 2. Check that it fits in the immediate.
11743     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11744       return false;
11745 
11746     // 3. Check that the computation is legal.
11747     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11748       return false;
11749 
11750     // Check that the zext is legal if it needs one.
11751     EVT TruncateType = Inst->getValueType(0);
11752     if (TruncateType != SliceType &&
11753         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11754       return false;
11755 
11756     return true;
11757   }
11758 
11759   /// \brief Get the offset in bytes of this slice in the original chunk of
11760   /// bits.
11761   /// \pre DAG != nullptr.
11762   uint64_t getOffsetFromBase() const {
11763     assert(DAG && "Missing context.");
11764     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11765     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11766     uint64_t Offset = Shift / 8;
11767     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11768     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11769            "The size of the original loaded type is not a multiple of a"
11770            " byte.");
11771     // If Offset is bigger than TySizeInBytes, it means we are loading all
11772     // zeros. This should have been optimized before in the process.
11773     assert(TySizeInBytes > Offset &&
11774            "Invalid shift amount for given loaded size");
11775     if (IsBigEndian)
11776       Offset = TySizeInBytes - Offset - getLoadedSize();
11777     return Offset;
11778   }
11779 
11780   /// \brief Generate the sequence of instructions to load the slice
11781   /// represented by this object and redirect the uses of this slice to
11782   /// this new sequence of instructions.
11783   /// \pre this->Inst && this->Origin are valid Instructions and this
11784   /// object passed the legal check: LoadedSlice::isLegal returned true.
11785   /// \return The last instruction of the sequence used to load the slice.
11786   SDValue loadSlice() const {
11787     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11788     const SDValue &OldBaseAddr = Origin->getBasePtr();
11789     SDValue BaseAddr = OldBaseAddr;
11790     // Get the offset in that chunk of bytes w.r.t. the endianness.
11791     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11792     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11793     if (Offset) {
11794       // BaseAddr = BaseAddr + Offset.
11795       EVT ArithType = BaseAddr.getValueType();
11796       SDLoc DL(Origin);
11797       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11798                               DAG->getConstant(Offset, DL, ArithType));
11799     }
11800 
11801     // Create the type of the loaded slice according to its size.
11802     EVT SliceType = getLoadedType();
11803 
11804     // Create the load for the slice.
11805     SDValue LastInst =
11806         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11807                      Origin->getPointerInfo().getWithOffset(Offset),
11808                      getAlignment(), Origin->getMemOperand()->getFlags());
11809     // If the final type is not the same as the loaded type, this means that
11810     // we have to pad with zero. Create a zero extend for that.
11811     EVT FinalType = Inst->getValueType(0);
11812     if (SliceType != FinalType)
11813       LastInst =
11814           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11815     return LastInst;
11816   }
11817 
11818   /// \brief Check if this slice can be merged with an expensive cross register
11819   /// bank copy. E.g.,
11820   /// i = load i32
11821   /// f = bitcast i32 i to float
11822   bool canMergeExpensiveCrossRegisterBankCopy() const {
11823     if (!Inst || !Inst->hasOneUse())
11824       return false;
11825     SDNode *Use = *Inst->use_begin();
11826     if (Use->getOpcode() != ISD::BITCAST)
11827       return false;
11828     assert(DAG && "Missing context");
11829     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11830     EVT ResVT = Use->getValueType(0);
11831     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11832     const TargetRegisterClass *ArgRC =
11833         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11834     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11835       return false;
11836 
11837     // At this point, we know that we perform a cross-register-bank copy.
11838     // Check if it is expensive.
11839     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11840     // Assume bitcasts are cheap, unless both register classes do not
11841     // explicitly share a common sub class.
11842     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11843       return false;
11844 
11845     // Check if it will be merged with the load.
11846     // 1. Check the alignment constraint.
11847     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11848         ResVT.getTypeForEVT(*DAG->getContext()));
11849 
11850     if (RequiredAlignment > getAlignment())
11851       return false;
11852 
11853     // 2. Check that the load is a legal operation for that type.
11854     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11855       return false;
11856 
11857     // 3. Check that we do not have a zext in the way.
11858     if (Inst->getValueType(0) != getLoadedType())
11859       return false;
11860 
11861     return true;
11862   }
11863 };
11864 }
11865 
11866 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11867 /// \p UsedBits looks like 0..0 1..1 0..0.
11868 static bool areUsedBitsDense(const APInt &UsedBits) {
11869   // If all the bits are one, this is dense!
11870   if (UsedBits.isAllOnesValue())
11871     return true;
11872 
11873   // Get rid of the unused bits on the right.
11874   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11875   // Get rid of the unused bits on the left.
11876   if (NarrowedUsedBits.countLeadingZeros())
11877     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11878   // Check that the chunk of bits is completely used.
11879   return NarrowedUsedBits.isAllOnesValue();
11880 }
11881 
11882 /// \brief Check whether or not \p First and \p Second are next to each other
11883 /// in memory. This means that there is no hole between the bits loaded
11884 /// by \p First and the bits loaded by \p Second.
11885 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11886                                      const LoadedSlice &Second) {
11887   assert(First.Origin == Second.Origin && First.Origin &&
11888          "Unable to match different memory origins.");
11889   APInt UsedBits = First.getUsedBits();
11890   assert((UsedBits & Second.getUsedBits()) == 0 &&
11891          "Slices are not supposed to overlap.");
11892   UsedBits |= Second.getUsedBits();
11893   return areUsedBitsDense(UsedBits);
11894 }
11895 
11896 /// \brief Adjust the \p GlobalLSCost according to the target
11897 /// paring capabilities and the layout of the slices.
11898 /// \pre \p GlobalLSCost should account for at least as many loads as
11899 /// there is in the slices in \p LoadedSlices.
11900 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11901                                  LoadedSlice::Cost &GlobalLSCost) {
11902   unsigned NumberOfSlices = LoadedSlices.size();
11903   // If there is less than 2 elements, no pairing is possible.
11904   if (NumberOfSlices < 2)
11905     return;
11906 
11907   // Sort the slices so that elements that are likely to be next to each
11908   // other in memory are next to each other in the list.
11909   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11910             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11911     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11912     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11913   });
11914   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11915   // First (resp. Second) is the first (resp. Second) potentially candidate
11916   // to be placed in a paired load.
11917   const LoadedSlice *First = nullptr;
11918   const LoadedSlice *Second = nullptr;
11919   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11920                 // Set the beginning of the pair.
11921                                                            First = Second) {
11922 
11923     Second = &LoadedSlices[CurrSlice];
11924 
11925     // If First is NULL, it means we start a new pair.
11926     // Get to the next slice.
11927     if (!First)
11928       continue;
11929 
11930     EVT LoadedType = First->getLoadedType();
11931 
11932     // If the types of the slices are different, we cannot pair them.
11933     if (LoadedType != Second->getLoadedType())
11934       continue;
11935 
11936     // Check if the target supplies paired loads for this type.
11937     unsigned RequiredAlignment = 0;
11938     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11939       // move to the next pair, this type is hopeless.
11940       Second = nullptr;
11941       continue;
11942     }
11943     // Check if we meet the alignment requirement.
11944     if (RequiredAlignment > First->getAlignment())
11945       continue;
11946 
11947     // Check that both loads are next to each other in memory.
11948     if (!areSlicesNextToEachOther(*First, *Second))
11949       continue;
11950 
11951     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11952     --GlobalLSCost.Loads;
11953     // Move to the next pair.
11954     Second = nullptr;
11955   }
11956 }
11957 
11958 /// \brief Check the profitability of all involved LoadedSlice.
11959 /// Currently, it is considered profitable if there is exactly two
11960 /// involved slices (1) which are (2) next to each other in memory, and
11961 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11962 ///
11963 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11964 /// the elements themselves.
11965 ///
11966 /// FIXME: When the cost model will be mature enough, we can relax
11967 /// constraints (1) and (2).
11968 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11969                                 const APInt &UsedBits, bool ForCodeSize) {
11970   unsigned NumberOfSlices = LoadedSlices.size();
11971   if (StressLoadSlicing)
11972     return NumberOfSlices > 1;
11973 
11974   // Check (1).
11975   if (NumberOfSlices != 2)
11976     return false;
11977 
11978   // Check (2).
11979   if (!areUsedBitsDense(UsedBits))
11980     return false;
11981 
11982   // Check (3).
11983   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11984   // The original code has one big load.
11985   OrigCost.Loads = 1;
11986   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11987     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11988     // Accumulate the cost of all the slices.
11989     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11990     GlobalSlicingCost += SliceCost;
11991 
11992     // Account as cost in the original configuration the gain obtained
11993     // with the current slices.
11994     OrigCost.addSliceGain(LS);
11995   }
11996 
11997   // If the target supports paired load, adjust the cost accordingly.
11998   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11999   return OrigCost > GlobalSlicingCost;
12000 }
12001 
12002 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12003 /// operations, split it in the various pieces being extracted.
12004 ///
12005 /// This sort of thing is introduced by SROA.
12006 /// This slicing takes care not to insert overlapping loads.
12007 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
12008 bool DAGCombiner::SliceUpLoad(SDNode *N) {
12009   if (Level < AfterLegalizeDAG)
12010     return false;
12011 
12012   LoadSDNode *LD = cast<LoadSDNode>(N);
12013   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12014       !LD->getValueType(0).isInteger())
12015     return false;
12016 
12017   // Keep track of already used bits to detect overlapping values.
12018   // In that case, we will just abort the transformation.
12019   APInt UsedBits(LD->getValueSizeInBits(0), 0);
12020 
12021   SmallVector<LoadedSlice, 4> LoadedSlices;
12022 
12023   // Check if this load is used as several smaller chunks of bits.
12024   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12025   // of computation for each trunc.
12026   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12027        UI != UIEnd; ++UI) {
12028     // Skip the uses of the chain.
12029     if (UI.getUse().getResNo() != 0)
12030       continue;
12031 
12032     SDNode *User = *UI;
12033     unsigned Shift = 0;
12034 
12035     // Check if this is a trunc(lshr).
12036     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12037         isa<ConstantSDNode>(User->getOperand(1))) {
12038       Shift = User->getConstantOperandVal(1);
12039       User = *User->use_begin();
12040     }
12041 
12042     // At this point, User is a Truncate, iff we encountered, trunc or
12043     // trunc(lshr).
12044     if (User->getOpcode() != ISD::TRUNCATE)
12045       return false;
12046 
12047     // The width of the type must be a power of 2 and greater than 8-bits.
12048     // Otherwise the load cannot be represented in LLVM IR.
12049     // Moreover, if we shifted with a non-8-bits multiple, the slice
12050     // will be across several bytes. We do not support that.
12051     unsigned Width = User->getValueSizeInBits(0);
12052     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12053       return 0;
12054 
12055     // Build the slice for this chain of computations.
12056     LoadedSlice LS(User, LD, Shift, &DAG);
12057     APInt CurrentUsedBits = LS.getUsedBits();
12058 
12059     // Check if this slice overlaps with another.
12060     if ((CurrentUsedBits & UsedBits) != 0)
12061       return false;
12062     // Update the bits used globally.
12063     UsedBits |= CurrentUsedBits;
12064 
12065     // Check if the new slice would be legal.
12066     if (!LS.isLegal())
12067       return false;
12068 
12069     // Record the slice.
12070     LoadedSlices.push_back(LS);
12071   }
12072 
12073   // Abort slicing if it does not seem to be profitable.
12074   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12075     return false;
12076 
12077   ++SlicedLoads;
12078 
12079   // Rewrite each chain to use an independent load.
12080   // By construction, each chain can be represented by a unique load.
12081 
12082   // Prepare the argument for the new token factor for all the slices.
12083   SmallVector<SDValue, 8> ArgChains;
12084   for (SmallVectorImpl<LoadedSlice>::const_iterator
12085            LSIt = LoadedSlices.begin(),
12086            LSItEnd = LoadedSlices.end();
12087        LSIt != LSItEnd; ++LSIt) {
12088     SDValue SliceInst = LSIt->loadSlice();
12089     CombineTo(LSIt->Inst, SliceInst, true);
12090     if (SliceInst.getOpcode() != ISD::LOAD)
12091       SliceInst = SliceInst.getOperand(0);
12092     assert(SliceInst->getOpcode() == ISD::LOAD &&
12093            "It takes more than a zext to get to the loaded slice!!");
12094     ArgChains.push_back(SliceInst.getValue(1));
12095   }
12096 
12097   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12098                               ArgChains);
12099   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12100   AddToWorklist(Chain.getNode());
12101   return true;
12102 }
12103 
12104 /// Check to see if V is (and load (ptr), imm), where the load is having
12105 /// specific bytes cleared out.  If so, return the byte size being masked out
12106 /// and the shift amount.
12107 static std::pair<unsigned, unsigned>
12108 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12109   std::pair<unsigned, unsigned> Result(0, 0);
12110 
12111   // Check for the structure we're looking for.
12112   if (V->getOpcode() != ISD::AND ||
12113       !isa<ConstantSDNode>(V->getOperand(1)) ||
12114       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12115     return Result;
12116 
12117   // Check the chain and pointer.
12118   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12119   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12120 
12121   // The store should be chained directly to the load or be an operand of a
12122   // tokenfactor.
12123   if (LD == Chain.getNode())
12124     ; // ok.
12125   else if (Chain->getOpcode() != ISD::TokenFactor)
12126     return Result; // Fail.
12127   else {
12128     bool isOk = false;
12129     for (const SDValue &ChainOp : Chain->op_values())
12130       if (ChainOp.getNode() == LD) {
12131         isOk = true;
12132         break;
12133       }
12134     if (!isOk) return Result;
12135   }
12136 
12137   // This only handles simple types.
12138   if (V.getValueType() != MVT::i16 &&
12139       V.getValueType() != MVT::i32 &&
12140       V.getValueType() != MVT::i64)
12141     return Result;
12142 
12143   // Check the constant mask.  Invert it so that the bits being masked out are
12144   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12145   // follow the sign bit for uniformity.
12146   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12147   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12148   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12149   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12150   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12151   if (NotMaskLZ == 64) return Result;  // All zero mask.
12152 
12153   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12154   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12155     return Result;
12156 
12157   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12158   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12159     NotMaskLZ -= 64-V.getValueSizeInBits();
12160 
12161   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12162   switch (MaskedBytes) {
12163   case 1:
12164   case 2:
12165   case 4: break;
12166   default: return Result; // All one mask, or 5-byte mask.
12167   }
12168 
12169   // Verify that the first bit starts at a multiple of mask so that the access
12170   // is aligned the same as the access width.
12171   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12172 
12173   Result.first = MaskedBytes;
12174   Result.second = NotMaskTZ/8;
12175   return Result;
12176 }
12177 
12178 
12179 /// Check to see if IVal is something that provides a value as specified by
12180 /// MaskInfo. If so, replace the specified store with a narrower store of
12181 /// truncated IVal.
12182 static SDNode *
12183 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12184                                 SDValue IVal, StoreSDNode *St,
12185                                 DAGCombiner *DC) {
12186   unsigned NumBytes = MaskInfo.first;
12187   unsigned ByteShift = MaskInfo.second;
12188   SelectionDAG &DAG = DC->getDAG();
12189 
12190   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12191   // that uses this.  If not, this is not a replacement.
12192   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12193                                   ByteShift*8, (ByteShift+NumBytes)*8);
12194   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12195 
12196   // Check that it is legal on the target to do this.  It is legal if the new
12197   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12198   // legalization.
12199   MVT VT = MVT::getIntegerVT(NumBytes*8);
12200   if (!DC->isTypeLegal(VT))
12201     return nullptr;
12202 
12203   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12204   // shifted by ByteShift and truncated down to NumBytes.
12205   if (ByteShift) {
12206     SDLoc DL(IVal);
12207     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12208                        DAG.getConstant(ByteShift*8, DL,
12209                                     DC->getShiftAmountTy(IVal.getValueType())));
12210   }
12211 
12212   // Figure out the offset for the store and the alignment of the access.
12213   unsigned StOffset;
12214   unsigned NewAlign = St->getAlignment();
12215 
12216   if (DAG.getDataLayout().isLittleEndian())
12217     StOffset = ByteShift;
12218   else
12219     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12220 
12221   SDValue Ptr = St->getBasePtr();
12222   if (StOffset) {
12223     SDLoc DL(IVal);
12224     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12225                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12226     NewAlign = MinAlign(NewAlign, StOffset);
12227   }
12228 
12229   // Truncate down to the new size.
12230   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12231 
12232   ++OpsNarrowed;
12233   return DAG
12234       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12235                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12236       .getNode();
12237 }
12238 
12239 
12240 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12241 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12242 /// narrowing the load and store if it would end up being a win for performance
12243 /// or code size.
12244 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12245   StoreSDNode *ST  = cast<StoreSDNode>(N);
12246   if (ST->isVolatile())
12247     return SDValue();
12248 
12249   SDValue Chain = ST->getChain();
12250   SDValue Value = ST->getValue();
12251   SDValue Ptr   = ST->getBasePtr();
12252   EVT VT = Value.getValueType();
12253 
12254   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12255     return SDValue();
12256 
12257   unsigned Opc = Value.getOpcode();
12258 
12259   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12260   // is a byte mask indicating a consecutive number of bytes, check to see if
12261   // Y is known to provide just those bytes.  If so, we try to replace the
12262   // load + replace + store sequence with a single (narrower) store, which makes
12263   // the load dead.
12264   if (Opc == ISD::OR) {
12265     std::pair<unsigned, unsigned> MaskedLoad;
12266     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12267     if (MaskedLoad.first)
12268       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12269                                                   Value.getOperand(1), ST,this))
12270         return SDValue(NewST, 0);
12271 
12272     // Or is commutative, so try swapping X and Y.
12273     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12274     if (MaskedLoad.first)
12275       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12276                                                   Value.getOperand(0), ST,this))
12277         return SDValue(NewST, 0);
12278   }
12279 
12280   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12281       Value.getOperand(1).getOpcode() != ISD::Constant)
12282     return SDValue();
12283 
12284   SDValue N0 = Value.getOperand(0);
12285   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12286       Chain == SDValue(N0.getNode(), 1)) {
12287     LoadSDNode *LD = cast<LoadSDNode>(N0);
12288     if (LD->getBasePtr() != Ptr ||
12289         LD->getPointerInfo().getAddrSpace() !=
12290         ST->getPointerInfo().getAddrSpace())
12291       return SDValue();
12292 
12293     // Find the type to narrow it the load / op / store to.
12294     SDValue N1 = Value.getOperand(1);
12295     unsigned BitWidth = N1.getValueSizeInBits();
12296     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12297     if (Opc == ISD::AND)
12298       Imm ^= APInt::getAllOnesValue(BitWidth);
12299     if (Imm == 0 || Imm.isAllOnesValue())
12300       return SDValue();
12301     unsigned ShAmt = Imm.countTrailingZeros();
12302     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12303     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12304     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12305     // The narrowing should be profitable, the load/store operation should be
12306     // legal (or custom) and the store size should be equal to the NewVT width.
12307     while (NewBW < BitWidth &&
12308            (NewVT.getStoreSizeInBits() != NewBW ||
12309             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12310             !TLI.isNarrowingProfitable(VT, NewVT))) {
12311       NewBW = NextPowerOf2(NewBW);
12312       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12313     }
12314     if (NewBW >= BitWidth)
12315       return SDValue();
12316 
12317     // If the lsb changed does not start at the type bitwidth boundary,
12318     // start at the previous one.
12319     if (ShAmt % NewBW)
12320       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12321     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12322                                    std::min(BitWidth, ShAmt + NewBW));
12323     if ((Imm & Mask) == Imm) {
12324       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12325       if (Opc == ISD::AND)
12326         NewImm ^= APInt::getAllOnesValue(NewBW);
12327       uint64_t PtrOff = ShAmt / 8;
12328       // For big endian targets, we need to adjust the offset to the pointer to
12329       // load the correct bytes.
12330       if (DAG.getDataLayout().isBigEndian())
12331         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12332 
12333       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12334       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12335       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12336         return SDValue();
12337 
12338       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12339                                    Ptr.getValueType(), Ptr,
12340                                    DAG.getConstant(PtrOff, SDLoc(LD),
12341                                                    Ptr.getValueType()));
12342       SDValue NewLD =
12343           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12344                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12345                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12346       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12347                                    DAG.getConstant(NewImm, SDLoc(Value),
12348                                                    NewVT));
12349       SDValue NewST =
12350           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12351                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12352 
12353       AddToWorklist(NewPtr.getNode());
12354       AddToWorklist(NewLD.getNode());
12355       AddToWorklist(NewVal.getNode());
12356       WorklistRemover DeadNodes(*this);
12357       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12358       ++OpsNarrowed;
12359       return NewST;
12360     }
12361   }
12362 
12363   return SDValue();
12364 }
12365 
12366 /// For a given floating point load / store pair, if the load value isn't used
12367 /// by any other operations, then consider transforming the pair to integer
12368 /// load / store operations if the target deems the transformation profitable.
12369 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12370   StoreSDNode *ST  = cast<StoreSDNode>(N);
12371   SDValue Chain = ST->getChain();
12372   SDValue Value = ST->getValue();
12373   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12374       Value.hasOneUse() &&
12375       Chain == SDValue(Value.getNode(), 1)) {
12376     LoadSDNode *LD = cast<LoadSDNode>(Value);
12377     EVT VT = LD->getMemoryVT();
12378     if (!VT.isFloatingPoint() ||
12379         VT != ST->getMemoryVT() ||
12380         LD->isNonTemporal() ||
12381         ST->isNonTemporal() ||
12382         LD->getPointerInfo().getAddrSpace() != 0 ||
12383         ST->getPointerInfo().getAddrSpace() != 0)
12384       return SDValue();
12385 
12386     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12387     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12388         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12389         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12390         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12391       return SDValue();
12392 
12393     unsigned LDAlign = LD->getAlignment();
12394     unsigned STAlign = ST->getAlignment();
12395     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12396     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12397     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12398       return SDValue();
12399 
12400     SDValue NewLD =
12401         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12402                     LD->getPointerInfo(), LDAlign);
12403 
12404     SDValue NewST =
12405         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12406                      ST->getPointerInfo(), STAlign);
12407 
12408     AddToWorklist(NewLD.getNode());
12409     AddToWorklist(NewST.getNode());
12410     WorklistRemover DeadNodes(*this);
12411     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12412     ++LdStFP2Int;
12413     return NewST;
12414   }
12415 
12416   return SDValue();
12417 }
12418 
12419 // This is a helper function for visitMUL to check the profitability
12420 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12421 // MulNode is the original multiply, AddNode is (add x, c1),
12422 // and ConstNode is c2.
12423 //
12424 // If the (add x, c1) has multiple uses, we could increase
12425 // the number of adds if we make this transformation.
12426 // It would only be worth doing this if we can remove a
12427 // multiply in the process. Check for that here.
12428 // To illustrate:
12429 //     (A + c1) * c3
12430 //     (A + c2) * c3
12431 // We're checking for cases where we have common "c3 * A" expressions.
12432 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12433                                               SDValue &AddNode,
12434                                               SDValue &ConstNode) {
12435   APInt Val;
12436 
12437   // If the add only has one use, this would be OK to do.
12438   if (AddNode.getNode()->hasOneUse())
12439     return true;
12440 
12441   // Walk all the users of the constant with which we're multiplying.
12442   for (SDNode *Use : ConstNode->uses()) {
12443 
12444     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12445       continue;
12446 
12447     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12448       SDNode *OtherOp;
12449       SDNode *MulVar = AddNode.getOperand(0).getNode();
12450 
12451       // OtherOp is what we're multiplying against the constant.
12452       if (Use->getOperand(0) == ConstNode)
12453         OtherOp = Use->getOperand(1).getNode();
12454       else
12455         OtherOp = Use->getOperand(0).getNode();
12456 
12457       // Check to see if multiply is with the same operand of our "add".
12458       //
12459       //     ConstNode  = CONST
12460       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12461       //     ...
12462       //     AddNode  = (A + c1)  <-- MulVar is A.
12463       //         = AddNode * ConstNode   <-- current visiting instruction.
12464       //
12465       // If we make this transformation, we will have a common
12466       // multiply (ConstNode * A) that we can save.
12467       if (OtherOp == MulVar)
12468         return true;
12469 
12470       // Now check to see if a future expansion will give us a common
12471       // multiply.
12472       //
12473       //     ConstNode  = CONST
12474       //     AddNode    = (A + c1)
12475       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12476       //     ...
12477       //     OtherOp = (A + c2)
12478       //     Use     = OtherOp * ConstNode <-- visiting Use.
12479       //
12480       // If we make this transformation, we will have a common
12481       // multiply (CONST * A) after we also do the same transformation
12482       // to the "t2" instruction.
12483       if (OtherOp->getOpcode() == ISD::ADD &&
12484           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12485           OtherOp->getOperand(0).getNode() == MulVar)
12486         return true;
12487     }
12488   }
12489 
12490   // Didn't find a case where this would be profitable.
12491   return false;
12492 }
12493 
12494 static SDValue peekThroughBitcast(SDValue V) {
12495   while (V.getOpcode() == ISD::BITCAST)
12496     V = V.getOperand(0);
12497   return V;
12498 }
12499 
12500 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12501                                          unsigned NumStores) {
12502   SmallVector<SDValue, 8> Chains;
12503   SmallPtrSet<const SDNode *, 8> Visited;
12504   SDLoc StoreDL(StoreNodes[0].MemNode);
12505 
12506   for (unsigned i = 0; i < NumStores; ++i) {
12507     Visited.insert(StoreNodes[i].MemNode);
12508   }
12509 
12510   // don't include nodes that are children
12511   for (unsigned i = 0; i < NumStores; ++i) {
12512     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12513       Chains.push_back(StoreNodes[i].MemNode->getChain());
12514   }
12515 
12516   assert(Chains.size() > 0 && "Chain should have generated a chain");
12517   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12518 }
12519 
12520 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12521     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12522     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12523   // Make sure we have something to merge.
12524   if (NumStores < 2)
12525     return false;
12526 
12527   // The latest Node in the DAG.
12528   SDLoc DL(StoreNodes[0].MemNode);
12529 
12530   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12531   unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12532   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12533 
12534   EVT StoreTy;
12535   if (UseVector) {
12536     unsigned Elts = NumStores * NumMemElts;
12537     // Get the type for the merged vector store.
12538     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12539   } else
12540     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12541 
12542   SDValue StoredVal;
12543   if (UseVector) {
12544     if (IsConstantSrc) {
12545       SmallVector<SDValue, 8> BuildVector;
12546       for (unsigned I = 0; I != NumStores; ++I) {
12547         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12548         SDValue Val = St->getValue();
12549         // If constant is of the wrong type, convert it now.
12550         if (MemVT != Val.getValueType()) {
12551           Val = peekThroughBitcast(Val);
12552           // Deal with constants of wrong size.
12553           if (ElementSizeBytes * 8 != Val.getValueSizeInBits()) {
12554             EVT IntMemVT =
12555                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12556             if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val))
12557               Val = DAG.getConstant(
12558                   CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(
12559                       8 * ElementSizeBytes),
12560                   SDLoc(CFP), IntMemVT);
12561             else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12562               Val = DAG.getConstant(
12563                   C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes),
12564                   SDLoc(C), IntMemVT);
12565           }
12566           // Make sure correctly size type is the correct type.
12567           Val = DAG.getBitcast(MemVT, Val);
12568         }
12569         BuildVector.push_back(Val);
12570       }
12571       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12572                                                : ISD::BUILD_VECTOR,
12573                               DL, StoreTy, BuildVector);
12574     } else {
12575       SmallVector<SDValue, 8> Ops;
12576       for (unsigned i = 0; i < NumStores; ++i) {
12577         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12578         SDValue Val = peekThroughBitcast(St->getValue());
12579         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12580         // type MemVT. If the underlying value is not the correct
12581         // type, but it is an extraction of an appropriate vector we
12582         // can recast Val to be of the correct type. This may require
12583         // converting between EXTRACT_VECTOR_ELT and
12584         // EXTRACT_SUBVECTOR.
12585         if ((MemVT != Val.getValueType()) &&
12586             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12587              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12588           SDValue Vec = Val.getOperand(0);
12589           EVT MemVTScalarTy = MemVT.getScalarType();
12590           // We may need to add a bitcast here to get types to line up.
12591           if (MemVTScalarTy != Vec.getValueType()) {
12592             unsigned Elts = Vec.getValueType().getSizeInBits() /
12593                             MemVTScalarTy.getSizeInBits();
12594             EVT NewVecTy =
12595                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12596             Vec = DAG.getBitcast(NewVecTy, Vec);
12597           }
12598           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12599                                         : ISD::EXTRACT_VECTOR_ELT;
12600           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12601         }
12602         Ops.push_back(Val);
12603       }
12604 
12605       // Build the extracted vector elements back into a vector.
12606       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12607                                                : ISD::BUILD_VECTOR,
12608                               DL, StoreTy, Ops);
12609     }
12610   } else {
12611     // We should always use a vector store when merging extracted vector
12612     // elements, so this path implies a store of constants.
12613     assert(IsConstantSrc && "Merged vector elements should use vector store");
12614 
12615     APInt StoreInt(SizeInBits, 0);
12616 
12617     // Construct a single integer constant which is made of the smaller
12618     // constant inputs.
12619     bool IsLE = DAG.getDataLayout().isLittleEndian();
12620     for (unsigned i = 0; i < NumStores; ++i) {
12621       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12622       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12623 
12624       SDValue Val = St->getValue();
12625       StoreInt <<= ElementSizeBytes * 8;
12626       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12627         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12628       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12629         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12630       } else {
12631         llvm_unreachable("Invalid constant element type");
12632       }
12633     }
12634 
12635     // Create the new Load and Store operations.
12636     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12637   }
12638 
12639   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12640   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12641 
12642   // make sure we use trunc store if it's necessary to be legal.
12643   SDValue NewStore;
12644   if (!UseTrunc) {
12645     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12646                             FirstInChain->getPointerInfo(),
12647                             FirstInChain->getAlignment());
12648   } else { // Must be realized as a trunc store
12649     EVT LegalizedStoredValueTy =
12650         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12651     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12652     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12653     SDValue ExtendedStoreVal =
12654         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12655                         LegalizedStoredValueTy);
12656     NewStore = DAG.getTruncStore(
12657         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12658         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12659         FirstInChain->getAlignment(),
12660         FirstInChain->getMemOperand()->getFlags());
12661   }
12662 
12663   // Replace all merged stores with the new store.
12664   for (unsigned i = 0; i < NumStores; ++i)
12665     CombineTo(StoreNodes[i].MemNode, NewStore);
12666 
12667   AddToWorklist(NewChain.getNode());
12668   return true;
12669 }
12670 
12671 void DAGCombiner::getStoreMergeCandidates(
12672     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12673   // This holds the base pointer, index, and the offset in bytes from the base
12674   // pointer.
12675   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12676   EVT MemVT = St->getMemoryVT();
12677 
12678   SDValue Val = peekThroughBitcast(St->getValue());
12679   // We must have a base and an offset.
12680   if (!BasePtr.getBase().getNode())
12681     return;
12682 
12683   // Do not handle stores to undef base pointers.
12684   if (BasePtr.getBase().isUndef())
12685     return;
12686 
12687   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
12688   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12689                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12690   bool IsLoadSrc = isa<LoadSDNode>(Val);
12691   BaseIndexOffset LBasePtr;
12692   // Match on loadbaseptr if relevant.
12693   EVT LoadVT;
12694   if (IsLoadSrc) {
12695     auto *Ld = cast<LoadSDNode>(Val);
12696     LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12697     LoadVT = Ld->getMemoryVT();
12698     // Load and store should be the same type.
12699     if (MemVT != LoadVT)
12700       return;
12701   }
12702   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12703                             int64_t &Offset) -> bool {
12704     if (Other->isVolatile() || Other->isIndexed())
12705       return false;
12706     SDValue Val = peekThroughBitcast(Other->getValue());
12707     // Allow merging constants of different types as integers.
12708     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
12709                                            : Other->getMemoryVT() != MemVT;
12710     if (IsLoadSrc) {
12711       if (NoTypeMatch)
12712         return false;
12713       // The Load's Base Ptr must also match
12714       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
12715         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12716         if (LoadVT != OtherLd->getMemoryVT())
12717           return false;
12718         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12719           return false;
12720       } else
12721         return false;
12722     }
12723     if (IsConstantSrc) {
12724       if (NoTypeMatch)
12725         return false;
12726       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
12727         return false;
12728     }
12729     if (IsExtractVecSrc) {
12730       // Do not merge truncated stores here.
12731       if (Other->isTruncatingStore())
12732         return false;
12733       if (!MemVT.bitsEq(Val.getValueType()))
12734         return false;
12735       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
12736           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12737         return false;
12738     }
12739     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12740     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12741   };
12742   // We looking for a root node which is an ancestor to all mergable
12743   // stores. We search up through a load, to our root and then down
12744   // through all children. For instance we will find Store{1,2,3} if
12745   // St is Store1, Store2. or Store3 where the root is not a load
12746   // which always true for nonvolatile ops. TODO: Expand
12747   // the search to find all valid candidates through multiple layers of loads.
12748   //
12749   // Root
12750   // |-------|-------|
12751   // Load    Load    Store3
12752   // |       |
12753   // Store1   Store2
12754   //
12755   // FIXME: We should be able to climb and
12756   // descend TokenFactors to find candidates as well.
12757 
12758   SDNode *RootNode = (St->getChain()).getNode();
12759 
12760   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12761     RootNode = Ldn->getChain().getNode();
12762     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12763       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12764         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12765           if (I2.getOperandNo() == 0)
12766             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12767               BaseIndexOffset Ptr;
12768               int64_t PtrDiff;
12769               if (CandidateMatch(OtherST, Ptr, PtrDiff))
12770                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12771             }
12772   } else
12773     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12774       if (I.getOperandNo() == 0)
12775         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12776           BaseIndexOffset Ptr;
12777           int64_t PtrDiff;
12778           if (CandidateMatch(OtherST, Ptr, PtrDiff))
12779             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12780         }
12781 }
12782 
12783 // We need to check that merging these stores does not cause a loop
12784 // in the DAG. Any store candidate may depend on another candidate
12785 // indirectly through its operand (we already consider dependencies
12786 // through the chain). Check in parallel by searching up from
12787 // non-chain operands of candidates.
12788 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12789     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12790   SmallPtrSet<const SDNode *, 16> Visited;
12791   SmallVector<const SDNode *, 8> Worklist;
12792   // search ops of store candidates
12793   for (unsigned i = 0; i < NumStores; ++i) {
12794     SDNode *n = StoreNodes[i].MemNode;
12795     // Potential loops may happen only through non-chain operands
12796     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12797       Worklist.push_back(n->getOperand(j).getNode());
12798   }
12799   // search through DAG. We can stop early if we find a storenode
12800   for (unsigned i = 0; i < NumStores; ++i) {
12801     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12802       return false;
12803   }
12804   return true;
12805 }
12806 
12807 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12808   if (OptLevel == CodeGenOpt::None)
12809     return false;
12810 
12811   EVT MemVT = St->getMemoryVT();
12812   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12813   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12814 
12815   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12816     return false;
12817 
12818   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12819       Attribute::NoImplicitFloat);
12820 
12821   // This function cannot currently deal with non-byte-sized memory sizes.
12822   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12823     return false;
12824 
12825   if (!MemVT.isSimple())
12826     return false;
12827 
12828   // Perform an early exit check. Do not bother looking at stored values that
12829   // are not constants, loads, or extracted vector elements.
12830   SDValue StoredVal = peekThroughBitcast(St->getValue());
12831   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12832   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12833                        isa<ConstantFPSDNode>(StoredVal);
12834   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12835                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12836 
12837   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12838     return false;
12839 
12840   SmallVector<MemOpLink, 8> StoreNodes;
12841   // Find potential store merge candidates by searching through chain sub-DAG
12842   getStoreMergeCandidates(St, StoreNodes);
12843 
12844   // Check if there is anything to merge.
12845   if (StoreNodes.size() < 2)
12846     return false;
12847 
12848   // Sort the memory operands according to their distance from the
12849   // base pointer.
12850   std::sort(StoreNodes.begin(), StoreNodes.end(),
12851             [](MemOpLink LHS, MemOpLink RHS) {
12852               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12853             });
12854 
12855   // Store Merge attempts to merge the lowest stores. This generally
12856   // works out as if successful, as the remaining stores are checked
12857   // after the first collection of stores is merged. However, in the
12858   // case that a non-mergeable store is found first, e.g., {p[-2],
12859   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12860   // mergeable cases. To prevent this, we prune such stores from the
12861   // front of StoreNodes here.
12862 
12863   bool RV = false;
12864   while (StoreNodes.size() > 1) {
12865     unsigned StartIdx = 0;
12866     while ((StartIdx + 1 < StoreNodes.size()) &&
12867            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12868                StoreNodes[StartIdx + 1].OffsetFromBase)
12869       ++StartIdx;
12870 
12871     // Bail if we don't have enough candidates to merge.
12872     if (StartIdx + 1 >= StoreNodes.size())
12873       return RV;
12874 
12875     if (StartIdx)
12876       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12877 
12878     // Scan the memory operations on the chain and find the first
12879     // non-consecutive store memory address.
12880     unsigned NumConsecutiveStores = 1;
12881     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12882     // Check that the addresses are consecutive starting from the second
12883     // element in the list of stores.
12884     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12885       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12886       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12887         break;
12888       NumConsecutiveStores = i + 1;
12889     }
12890 
12891     if (NumConsecutiveStores < 2) {
12892       StoreNodes.erase(StoreNodes.begin(),
12893                        StoreNodes.begin() + NumConsecutiveStores);
12894       continue;
12895     }
12896 
12897     // Check that we can merge these candidates without causing a cycle
12898     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12899                                                   NumConsecutiveStores)) {
12900       StoreNodes.erase(StoreNodes.begin(),
12901                        StoreNodes.begin() + NumConsecutiveStores);
12902       continue;
12903     }
12904 
12905     // The node with the lowest store address.
12906     LLVMContext &Context = *DAG.getContext();
12907     const DataLayout &DL = DAG.getDataLayout();
12908 
12909     // Store the constants into memory as one consecutive store.
12910     if (IsConstantSrc) {
12911       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12912       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12913       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12914       unsigned LastLegalType = 1;
12915       unsigned LastLegalVectorType = 1;
12916       bool LastIntegerTrunc = false;
12917       bool NonZero = false;
12918       unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
12919       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12920         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12921         SDValue StoredVal = ST->getValue();
12922         bool IsElementZero = false;
12923         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
12924           IsElementZero = C->isNullValue();
12925         else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
12926           IsElementZero = C->getConstantFPValue()->isNullValue();
12927         if (IsElementZero) {
12928           if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
12929             FirstZeroAfterNonZero = i;
12930         }
12931         NonZero |= !IsElementZero;
12932 
12933         // Find a legal type for the constant store.
12934         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12935         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12936         bool IsFast = false;
12937         if (TLI.isTypeLegal(StoreTy) &&
12938             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12939             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12940                                    FirstStoreAlign, &IsFast) &&
12941             IsFast) {
12942           LastIntegerTrunc = false;
12943           LastLegalType = i + 1;
12944           // Or check whether a truncstore is legal.
12945         } else if (TLI.getTypeAction(Context, StoreTy) ==
12946                    TargetLowering::TypePromoteInteger) {
12947           EVT LegalizedStoredValueTy =
12948               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12949           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12950               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12951               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12952                                      FirstStoreAlign, &IsFast) &&
12953               IsFast) {
12954             LastIntegerTrunc = true;
12955             LastLegalType = i + 1;
12956           }
12957         }
12958 
12959         // We only use vectors if the constant is known to be zero or the target
12960         // allows it and the function is not marked with the noimplicitfloat
12961         // attribute.
12962         if ((!NonZero ||
12963              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12964             !NoVectors) {
12965           // Find a legal type for the vector store.
12966           unsigned Elts = (i + 1) * NumMemElts;
12967           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
12968           if (TLI.isTypeLegal(Ty) &&
12969               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12970               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12971                                      FirstStoreAlign, &IsFast) &&
12972               IsFast)
12973             LastLegalVectorType = i + 1;
12974         }
12975       }
12976 
12977       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12978       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12979 
12980       // Check if we found a legal integer type that creates a meaningful merge.
12981       if (NumElem < 2) {
12982         // We know that candidate stores are in order and of correct
12983         // shape. While there is no mergeable sequence from the
12984         // beginning one may start later in the sequence. The only
12985         // reason a merge of size N could have failed where another of
12986         // the same size would not have, is if the alignment has
12987         // improved or we've dropped a non-zero value. Drop as many
12988         // candidates as we can here.
12989         unsigned NumSkip = 1;
12990         while (
12991             (NumSkip < NumConsecutiveStores) &&
12992             (NumSkip < FirstZeroAfterNonZero) &&
12993             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
12994           NumSkip++;
12995         }
12996         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
12997         continue;
12998       }
12999 
13000       bool Merged = MergeStoresOfConstantsOrVecElts(
13001           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13002       RV |= Merged;
13003 
13004       // Remove merged stores for next iteration.
13005       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13006       continue;
13007     }
13008 
13009     // When extracting multiple vector elements, try to store them
13010     // in one vector store rather than a sequence of scalar stores.
13011     if (IsExtractVecSrc) {
13012       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13013       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13014       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13015       unsigned NumStoresToMerge = 1;
13016       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13017         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13018         SDValue StVal = peekThroughBitcast(St->getValue());
13019         // This restriction could be loosened.
13020         // Bail out if any stored values are not elements extracted from a
13021         // vector. It should be possible to handle mixed sources, but load
13022         // sources need more careful handling (see the block of code below that
13023         // handles consecutive loads).
13024         if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13025             StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13026           return RV;
13027 
13028         // Find a legal type for the vector store.
13029         unsigned Elts = (i + 1) * NumMemElts;
13030         EVT Ty =
13031             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13032         bool IsFast;
13033         if (TLI.isTypeLegal(Ty) &&
13034             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13035             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13036                                    FirstStoreAlign, &IsFast) &&
13037             IsFast)
13038           NumStoresToMerge = i + 1;
13039       }
13040 
13041       // Check if we found a legal integer type that creates a meaningful merge.
13042       if (NumStoresToMerge < 2) {
13043         // We know that candidate stores are in order and of correct
13044         // shape. While there is no mergeable sequence from the
13045         // beginning one may start later in the sequence. The only
13046         // reason a merge of size N could have failed where another of
13047         // the same size would not have, is if the alignment has
13048         // improved. Drop as many candidates as we can here.
13049         unsigned NumSkip = 1;
13050         while ((NumSkip < NumConsecutiveStores) &&
13051                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13052           NumSkip++;
13053 
13054         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13055         continue;
13056       }
13057 
13058       bool Merged = MergeStoresOfConstantsOrVecElts(
13059           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13060       if (!Merged) {
13061         StoreNodes.erase(StoreNodes.begin(),
13062                          StoreNodes.begin() + NumStoresToMerge);
13063         continue;
13064       }
13065       // Remove merged stores for next iteration.
13066       StoreNodes.erase(StoreNodes.begin(),
13067                        StoreNodes.begin() + NumStoresToMerge);
13068       RV = true;
13069       continue;
13070     }
13071 
13072     // Below we handle the case of multiple consecutive stores that
13073     // come from multiple consecutive loads. We merge them into a single
13074     // wide load and a single wide store.
13075 
13076     // Look for load nodes which are used by the stored values.
13077     SmallVector<MemOpLink, 8> LoadNodes;
13078 
13079     // Find acceptable loads. Loads need to have the same chain (token factor),
13080     // must not be zext, volatile, indexed, and they must be consecutive.
13081     BaseIndexOffset LdBasePtr;
13082     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13083       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13084       SDValue Val = peekThroughBitcast(St->getValue());
13085       LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13086       if (!Ld)
13087         break;
13088 
13089       // Loads must only have one use.
13090       if (!Ld->hasNUsesOfValue(1, 0))
13091         break;
13092 
13093       // The memory operands must not be volatile.
13094       if (Ld->isVolatile() || Ld->isIndexed())
13095         break;
13096 
13097       // The stored memory type must be the same.
13098       if (Ld->getMemoryVT() != MemVT)
13099         break;
13100 
13101       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
13102       // If this is not the first ptr that we check.
13103       int64_t LdOffset = 0;
13104       if (LdBasePtr.getBase().getNode()) {
13105         // The base ptr must be the same.
13106         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13107           break;
13108       } else {
13109         // Check that all other base pointers are the same as this one.
13110         LdBasePtr = LdPtr;
13111       }
13112 
13113       // We found a potential memory operand to merge.
13114       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13115     }
13116 
13117     if (LoadNodes.size() < 2) {
13118       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13119       continue;
13120     }
13121 
13122     // If we have load/store pair instructions and we only have two values,
13123     // don't bother merging.
13124     unsigned RequiredAlignment;
13125     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13126         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13127       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13128       continue;
13129     }
13130     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13131     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13132     unsigned FirstStoreAlign = FirstInChain->getAlignment();
13133     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13134     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13135     unsigned FirstLoadAlign = FirstLoad->getAlignment();
13136 
13137     // Scan the memory operations on the chain and find the first
13138     // non-consecutive load memory address. These variables hold the index in
13139     // the store node array.
13140     unsigned LastConsecutiveLoad = 1;
13141     // This variable refers to the size and not index in the array.
13142     unsigned LastLegalVectorType = 1;
13143     unsigned LastLegalIntegerType = 1;
13144     bool isDereferenceable = true;
13145     bool DoIntegerTruncate = false;
13146     StartAddress = LoadNodes[0].OffsetFromBase;
13147     SDValue FirstChain = FirstLoad->getChain();
13148     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13149       // All loads must share the same chain.
13150       if (LoadNodes[i].MemNode->getChain() != FirstChain)
13151         break;
13152 
13153       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13154       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13155         break;
13156       LastConsecutiveLoad = i;
13157 
13158       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13159         isDereferenceable = false;
13160 
13161       // Find a legal type for the vector store.
13162       unsigned Elts = (i + 1) * NumMemElts;
13163       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13164 
13165       bool IsFastSt, IsFastLd;
13166       if (TLI.isTypeLegal(StoreTy) &&
13167           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13168           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13169                                  FirstStoreAlign, &IsFastSt) &&
13170           IsFastSt &&
13171           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13172                                  FirstLoadAlign, &IsFastLd) &&
13173           IsFastLd) {
13174         LastLegalVectorType = i + 1;
13175       }
13176 
13177       // Find a legal type for the integer store.
13178       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13179       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13180       if (TLI.isTypeLegal(StoreTy) &&
13181           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13182           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13183                                  FirstStoreAlign, &IsFastSt) &&
13184           IsFastSt &&
13185           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13186                                  FirstLoadAlign, &IsFastLd) &&
13187           IsFastLd) {
13188         LastLegalIntegerType = i + 1;
13189         DoIntegerTruncate = false;
13190         // Or check whether a truncstore and extload is legal.
13191       } else if (TLI.getTypeAction(Context, StoreTy) ==
13192                  TargetLowering::TypePromoteInteger) {
13193         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13194         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13195             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13196             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13197                                StoreTy) &&
13198             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13199                                StoreTy) &&
13200             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13201             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13202                                    FirstStoreAlign, &IsFastSt) &&
13203             IsFastSt &&
13204             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13205                                    FirstLoadAlign, &IsFastLd) &&
13206             IsFastLd) {
13207           LastLegalIntegerType = i + 1;
13208           DoIntegerTruncate = true;
13209         }
13210       }
13211     }
13212 
13213     // Only use vector types if the vector type is larger than the integer type.
13214     // If they are the same, use integers.
13215     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13216     unsigned LastLegalType =
13217         std::max(LastLegalVectorType, LastLegalIntegerType);
13218 
13219     // We add +1 here because the LastXXX variables refer to location while
13220     // the NumElem refers to array/index size.
13221     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13222     NumElem = std::min(LastLegalType, NumElem);
13223 
13224     if (NumElem < 2) {
13225       // We know that candidate stores are in order and of correct
13226       // shape. While there is no mergeable sequence from the
13227       // beginning one may start later in the sequence. The only
13228       // reason a merge of size N could have failed where another of
13229       // the same size would not have is if the alignment or either
13230       // the load or store has improved. Drop as many candidates as we
13231       // can here.
13232       unsigned NumSkip = 1;
13233       while ((NumSkip < LoadNodes.size()) &&
13234              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13235              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13236         NumSkip++;
13237       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13238       continue;
13239     }
13240 
13241     // Find if it is better to use vectors or integers to load and store
13242     // to memory.
13243     EVT JointMemOpVT;
13244     if (UseVectorTy) {
13245       // Find a legal type for the vector store.
13246       unsigned Elts = NumElem * NumMemElts;
13247       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13248     } else {
13249       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13250       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13251     }
13252 
13253     SDLoc LoadDL(LoadNodes[0].MemNode);
13254     SDLoc StoreDL(StoreNodes[0].MemNode);
13255 
13256     // The merged loads are required to have the same incoming chain, so
13257     // using the first's chain is acceptable.
13258 
13259     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13260     AddToWorklist(NewStoreChain.getNode());
13261 
13262     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13263                                           MachineMemOperand::MODereferenceable:
13264                                           MachineMemOperand::MONone;
13265 
13266     SDValue NewLoad, NewStore;
13267     if (UseVectorTy || !DoIntegerTruncate) {
13268       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13269                             FirstLoad->getBasePtr(),
13270                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13271                             MMOFlags);
13272       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13273                               FirstInChain->getBasePtr(),
13274                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13275     } else { // This must be the truncstore/extload case
13276       EVT ExtendedTy =
13277           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13278       NewLoad =
13279           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13280                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13281                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13282       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13283                                    FirstInChain->getBasePtr(),
13284                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13285                                    FirstInChain->getAlignment(),
13286                                    FirstInChain->getMemOperand()->getFlags());
13287     }
13288 
13289     // Transfer chain users from old loads to the new load.
13290     for (unsigned i = 0; i < NumElem; ++i) {
13291       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13292       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13293                                     SDValue(NewLoad.getNode(), 1));
13294     }
13295 
13296     // Replace the all stores with the new store. Recursively remove
13297     // corresponding value if its no longer used.
13298     for (unsigned i = 0; i < NumElem; ++i) {
13299       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13300       CombineTo(StoreNodes[i].MemNode, NewStore);
13301       if (Val.getNode()->use_empty())
13302         recursivelyDeleteUnusedNodes(Val.getNode());
13303     }
13304 
13305     RV = true;
13306     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13307     continue;
13308   }
13309   return RV;
13310 }
13311 
13312 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13313   SDLoc SL(ST);
13314   SDValue ReplStore;
13315 
13316   // Replace the chain to avoid dependency.
13317   if (ST->isTruncatingStore()) {
13318     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13319                                   ST->getBasePtr(), ST->getMemoryVT(),
13320                                   ST->getMemOperand());
13321   } else {
13322     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13323                              ST->getMemOperand());
13324   }
13325 
13326   // Create token to keep both nodes around.
13327   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13328                               MVT::Other, ST->getChain(), ReplStore);
13329 
13330   // Make sure the new and old chains are cleaned up.
13331   AddToWorklist(Token.getNode());
13332 
13333   // Don't add users to work list.
13334   return CombineTo(ST, Token, false);
13335 }
13336 
13337 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13338   SDValue Value = ST->getValue();
13339   if (Value.getOpcode() == ISD::TargetConstantFP)
13340     return SDValue();
13341 
13342   SDLoc DL(ST);
13343 
13344   SDValue Chain = ST->getChain();
13345   SDValue Ptr = ST->getBasePtr();
13346 
13347   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13348 
13349   // NOTE: If the original store is volatile, this transform must not increase
13350   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13351   // processor operation but an i64 (which is not legal) requires two.  So the
13352   // transform should not be done in this case.
13353 
13354   SDValue Tmp;
13355   switch (CFP->getSimpleValueType(0).SimpleTy) {
13356   default:
13357     llvm_unreachable("Unknown FP type");
13358   case MVT::f16:    // We don't do this for these yet.
13359   case MVT::f80:
13360   case MVT::f128:
13361   case MVT::ppcf128:
13362     return SDValue();
13363   case MVT::f32:
13364     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13365         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13366       ;
13367       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13368                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13369                             MVT::i32);
13370       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13371     }
13372 
13373     return SDValue();
13374   case MVT::f64:
13375     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13376          !ST->isVolatile()) ||
13377         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13378       ;
13379       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13380                             getZExtValue(), SDLoc(CFP), MVT::i64);
13381       return DAG.getStore(Chain, DL, Tmp,
13382                           Ptr, ST->getMemOperand());
13383     }
13384 
13385     if (!ST->isVolatile() &&
13386         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13387       // Many FP stores are not made apparent until after legalize, e.g. for
13388       // argument passing.  Since this is so common, custom legalize the
13389       // 64-bit integer store into two 32-bit stores.
13390       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13391       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13392       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13393       if (DAG.getDataLayout().isBigEndian())
13394         std::swap(Lo, Hi);
13395 
13396       unsigned Alignment = ST->getAlignment();
13397       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13398       AAMDNodes AAInfo = ST->getAAInfo();
13399 
13400       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13401                                  ST->getAlignment(), MMOFlags, AAInfo);
13402       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13403                         DAG.getConstant(4, DL, Ptr.getValueType()));
13404       Alignment = MinAlign(Alignment, 4U);
13405       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13406                                  ST->getPointerInfo().getWithOffset(4),
13407                                  Alignment, MMOFlags, AAInfo);
13408       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13409                          St0, St1);
13410     }
13411 
13412     return SDValue();
13413   }
13414 }
13415 
13416 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13417   StoreSDNode *ST  = cast<StoreSDNode>(N);
13418   SDValue Chain = ST->getChain();
13419   SDValue Value = ST->getValue();
13420   SDValue Ptr   = ST->getBasePtr();
13421 
13422   // If this is a store of a bit convert, store the input value if the
13423   // resultant store does not need a higher alignment than the original.
13424   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13425       ST->isUnindexed()) {
13426     EVT SVT = Value.getOperand(0).getValueType();
13427     if (((!LegalOperations && !ST->isVolatile()) ||
13428          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13429         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13430       unsigned OrigAlign = ST->getAlignment();
13431       bool Fast = false;
13432       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13433                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13434           Fast) {
13435         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13436                             ST->getPointerInfo(), OrigAlign,
13437                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13438       }
13439     }
13440   }
13441 
13442   // Turn 'store undef, Ptr' -> nothing.
13443   if (Value.isUndef() && ST->isUnindexed())
13444     return Chain;
13445 
13446   // Try to infer better alignment information than the store already has.
13447   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13448     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13449       if (Align > ST->getAlignment()) {
13450         SDValue NewStore =
13451             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13452                               ST->getMemoryVT(), Align,
13453                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13454         if (NewStore.getNode() != N)
13455           return CombineTo(ST, NewStore, true);
13456       }
13457     }
13458   }
13459 
13460   // Try transforming a pair floating point load / store ops to integer
13461   // load / store ops.
13462   if (SDValue NewST = TransformFPLoadStorePair(N))
13463     return NewST;
13464 
13465   if (ST->isUnindexed()) {
13466     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13467     // adjacent stores.
13468     if (findBetterNeighborChains(ST)) {
13469       // replaceStoreChain uses CombineTo, which handled all of the worklist
13470       // manipulation. Return the original node to not do anything else.
13471       return SDValue(ST, 0);
13472     }
13473     Chain = ST->getChain();
13474   }
13475 
13476   // FIXME: is there such a thing as a truncating indexed store?
13477   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13478       Value.getValueType().isInteger()) {
13479     // See if we can simplify the input to this truncstore with knowledge that
13480     // only the low bits are being used.  For example:
13481     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13482     SDValue Shorter = DAG.GetDemandedBits(
13483         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13484                                     ST->getMemoryVT().getScalarSizeInBits()));
13485     AddToWorklist(Value.getNode());
13486     if (Shorter.getNode())
13487       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13488                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13489 
13490     // Otherwise, see if we can simplify the operation with
13491     // SimplifyDemandedBits, which only works if the value has a single use.
13492     if (SimplifyDemandedBits(
13493             Value,
13494             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13495                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13496       // Re-visit the store if anything changed and the store hasn't been merged
13497       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13498       // node back to the worklist if necessary, but we also need to re-visit
13499       // the Store node itself.
13500       if (N->getOpcode() != ISD::DELETED_NODE)
13501         AddToWorklist(N);
13502       return SDValue(N, 0);
13503     }
13504   }
13505 
13506   // If this is a load followed by a store to the same location, then the store
13507   // is dead/noop.
13508   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13509     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13510         ST->isUnindexed() && !ST->isVolatile() &&
13511         // There can't be any side effects between the load and store, such as
13512         // a call or store.
13513         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13514       // The store is dead, remove it.
13515       return Chain;
13516     }
13517   }
13518 
13519   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13520     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13521         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13522         ST->getMemoryVT() == ST1->getMemoryVT()) {
13523       // If this is a store followed by a store with the same value to the same
13524       // location, then the store is dead/noop.
13525       if (ST1->getValue() == Value) {
13526         // The store is dead, remove it.
13527         return Chain;
13528       }
13529 
13530       // If this is a store who's preceeding store to the same location
13531       // and no one other node is chained to that store we can effectively
13532       // drop the store. Do not remove stores to undef as they may be used as
13533       // data sinks.
13534       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13535           !ST1->getBasePtr().isUndef()) {
13536         // ST1 is fully overwritten and can be elided. Combine with it's chain
13537         // value.
13538         CombineTo(ST1, ST1->getChain());
13539         return SDValue();
13540       }
13541     }
13542   }
13543 
13544   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13545   // truncating store.  We can do this even if this is already a truncstore.
13546   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13547       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13548       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13549                             ST->getMemoryVT())) {
13550     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13551                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13552   }
13553 
13554   // Only perform this optimization before the types are legal, because we
13555   // don't want to perform this optimization on every DAGCombine invocation.
13556   if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
13557                                            : !LegalTypes) {
13558     for (;;) {
13559       // There can be multiple store sequences on the same chain.
13560       // Keep trying to merge store sequences until we are unable to do so
13561       // or until we merge the last store on the chain.
13562       bool Changed = MergeConsecutiveStores(ST);
13563       if (!Changed) break;
13564       // Return N as merge only uses CombineTo and no worklist clean
13565       // up is necessary.
13566       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13567         return SDValue(N, 0);
13568     }
13569   }
13570 
13571   // Try transforming N to an indexed store.
13572   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13573     return SDValue(N, 0);
13574 
13575   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13576   //
13577   // Make sure to do this only after attempting to merge stores in order to
13578   //  avoid changing the types of some subset of stores due to visit order,
13579   //  preventing their merging.
13580   if (isa<ConstantFPSDNode>(ST->getValue())) {
13581     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13582       return NewSt;
13583   }
13584 
13585   if (SDValue NewSt = splitMergedValStore(ST))
13586     return NewSt;
13587 
13588   return ReduceLoadOpStoreWidth(N);
13589 }
13590 
13591 /// For the instruction sequence of store below, F and I values
13592 /// are bundled together as an i64 value before being stored into memory.
13593 /// Sometimes it is more efficent to generate separate stores for F and I,
13594 /// which can remove the bitwise instructions or sink them to colder places.
13595 ///
13596 ///   (store (or (zext (bitcast F to i32) to i64),
13597 ///              (shl (zext I to i64), 32)), addr)  -->
13598 ///   (store F, addr) and (store I, addr+4)
13599 ///
13600 /// Similarly, splitting for other merged store can also be beneficial, like:
13601 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13602 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13603 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13604 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13605 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13606 ///
13607 /// We allow each target to determine specifically which kind of splitting is
13608 /// supported.
13609 ///
13610 /// The store patterns are commonly seen from the simple code snippet below
13611 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13612 ///   void goo(const std::pair<int, float> &);
13613 ///   hoo() {
13614 ///     ...
13615 ///     goo(std::make_pair(tmp, ftmp));
13616 ///     ...
13617 ///   }
13618 ///
13619 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13620   if (OptLevel == CodeGenOpt::None)
13621     return SDValue();
13622 
13623   SDValue Val = ST->getValue();
13624   SDLoc DL(ST);
13625 
13626   // Match OR operand.
13627   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13628     return SDValue();
13629 
13630   // Match SHL operand and get Lower and Higher parts of Val.
13631   SDValue Op1 = Val.getOperand(0);
13632   SDValue Op2 = Val.getOperand(1);
13633   SDValue Lo, Hi;
13634   if (Op1.getOpcode() != ISD::SHL) {
13635     std::swap(Op1, Op2);
13636     if (Op1.getOpcode() != ISD::SHL)
13637       return SDValue();
13638   }
13639   Lo = Op2;
13640   Hi = Op1.getOperand(0);
13641   if (!Op1.hasOneUse())
13642     return SDValue();
13643 
13644   // Match shift amount to HalfValBitSize.
13645   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13646   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13647   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13648     return SDValue();
13649 
13650   // Lo and Hi are zero-extended from int with size less equal than 32
13651   // to i64.
13652   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13653       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13654       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13655       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13656       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13657       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13658     return SDValue();
13659 
13660   // Use the EVT of low and high parts before bitcast as the input
13661   // of target query.
13662   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13663                   ? Lo.getOperand(0).getValueType()
13664                   : Lo.getValueType();
13665   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13666                    ? Hi.getOperand(0).getValueType()
13667                    : Hi.getValueType();
13668   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13669     return SDValue();
13670 
13671   // Start to split store.
13672   unsigned Alignment = ST->getAlignment();
13673   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13674   AAMDNodes AAInfo = ST->getAAInfo();
13675 
13676   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13677   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13678   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13679   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13680 
13681   SDValue Chain = ST->getChain();
13682   SDValue Ptr = ST->getBasePtr();
13683   // Lower value store.
13684   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13685                              ST->getAlignment(), MMOFlags, AAInfo);
13686   Ptr =
13687       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13688                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13689   // Higher value store.
13690   SDValue St1 =
13691       DAG.getStore(St0, DL, Hi, Ptr,
13692                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13693                    Alignment / 2, MMOFlags, AAInfo);
13694   return St1;
13695 }
13696 
13697 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13698   SDValue InVec = N->getOperand(0);
13699   SDValue InVal = N->getOperand(1);
13700   SDValue EltNo = N->getOperand(2);
13701   SDLoc DL(N);
13702 
13703   // If the inserted element is an UNDEF, just use the input vector.
13704   if (InVal.isUndef())
13705     return InVec;
13706 
13707   EVT VT = InVec.getValueType();
13708 
13709   // Remove redundant insertions:
13710   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
13711   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13712       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
13713     return InVec;
13714 
13715   // Check that we know which element is being inserted
13716   if (!isa<ConstantSDNode>(EltNo))
13717     return SDValue();
13718   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13719 
13720   // Canonicalize insert_vector_elt dag nodes.
13721   // Example:
13722   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13723   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13724   //
13725   // Do this only if the child insert_vector node has one use; also
13726   // do this only if indices are both constants and Idx1 < Idx0.
13727   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13728       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13729     unsigned OtherElt = InVec.getConstantOperandVal(2);
13730     if (Elt < OtherElt) {
13731       // Swap nodes.
13732       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13733                                   InVec.getOperand(0), InVal, EltNo);
13734       AddToWorklist(NewOp.getNode());
13735       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13736                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13737     }
13738   }
13739 
13740   // If we can't generate a legal BUILD_VECTOR, exit
13741   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13742     return SDValue();
13743 
13744   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13745   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13746   // vector elements.
13747   SmallVector<SDValue, 8> Ops;
13748   // Do not combine these two vectors if the output vector will not replace
13749   // the input vector.
13750   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13751     Ops.append(InVec.getNode()->op_begin(),
13752                InVec.getNode()->op_end());
13753   } else if (InVec.isUndef()) {
13754     unsigned NElts = VT.getVectorNumElements();
13755     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13756   } else {
13757     return SDValue();
13758   }
13759 
13760   // Insert the element
13761   if (Elt < Ops.size()) {
13762     // All the operands of BUILD_VECTOR must have the same type;
13763     // we enforce that here.
13764     EVT OpVT = Ops[0].getValueType();
13765     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13766   }
13767 
13768   // Return the new vector
13769   return DAG.getBuildVector(VT, DL, Ops);
13770 }
13771 
13772 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13773     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13774   assert(!OriginalLoad->isVolatile());
13775 
13776   EVT ResultVT = EVE->getValueType(0);
13777   EVT VecEltVT = InVecVT.getVectorElementType();
13778   unsigned Align = OriginalLoad->getAlignment();
13779   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13780       VecEltVT.getTypeForEVT(*DAG.getContext()));
13781 
13782   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13783     return SDValue();
13784 
13785   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13786     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13787   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13788     return SDValue();
13789 
13790   Align = NewAlign;
13791 
13792   SDValue NewPtr = OriginalLoad->getBasePtr();
13793   SDValue Offset;
13794   EVT PtrType = NewPtr.getValueType();
13795   MachinePointerInfo MPI;
13796   SDLoc DL(EVE);
13797   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13798     int Elt = ConstEltNo->getZExtValue();
13799     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13800     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13801     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13802   } else {
13803     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13804     Offset = DAG.getNode(
13805         ISD::MUL, DL, PtrType, Offset,
13806         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13807     MPI = OriginalLoad->getPointerInfo();
13808   }
13809   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13810 
13811   // The replacement we need to do here is a little tricky: we need to
13812   // replace an extractelement of a load with a load.
13813   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13814   // Note that this replacement assumes that the extractvalue is the only
13815   // use of the load; that's okay because we don't want to perform this
13816   // transformation in other cases anyway.
13817   SDValue Load;
13818   SDValue Chain;
13819   if (ResultVT.bitsGT(VecEltVT)) {
13820     // If the result type of vextract is wider than the load, then issue an
13821     // extending load instead.
13822     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13823                                                   VecEltVT)
13824                                    ? ISD::ZEXTLOAD
13825                                    : ISD::EXTLOAD;
13826     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13827                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13828                           Align, OriginalLoad->getMemOperand()->getFlags(),
13829                           OriginalLoad->getAAInfo());
13830     Chain = Load.getValue(1);
13831   } else {
13832     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13833                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13834                        OriginalLoad->getAAInfo());
13835     Chain = Load.getValue(1);
13836     if (ResultVT.bitsLT(VecEltVT))
13837       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13838     else
13839       Load = DAG.getBitcast(ResultVT, Load);
13840   }
13841   WorklistRemover DeadNodes(*this);
13842   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13843   SDValue To[] = { Load, Chain };
13844   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13845   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13846   // worklist explicitly as well.
13847   AddToWorklist(Load.getNode());
13848   AddUsersToWorklist(Load.getNode()); // Add users too
13849   // Make sure to revisit this node to clean it up; it will usually be dead.
13850   AddToWorklist(EVE);
13851   ++OpsNarrowed;
13852   return SDValue(EVE, 0);
13853 }
13854 
13855 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13856   // (vextract (scalar_to_vector val, 0) -> val
13857   SDValue InVec = N->getOperand(0);
13858   EVT VT = InVec.getValueType();
13859   EVT NVT = N->getValueType(0);
13860 
13861   if (InVec.isUndef())
13862     return DAG.getUNDEF(NVT);
13863 
13864   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13865     // Check if the result type doesn't match the inserted element type. A
13866     // SCALAR_TO_VECTOR may truncate the inserted element and the
13867     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13868     SDValue InOp = InVec.getOperand(0);
13869     if (InOp.getValueType() != NVT) {
13870       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13871       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13872     }
13873     return InOp;
13874   }
13875 
13876   SDValue EltNo = N->getOperand(1);
13877   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13878 
13879   // extract_vector_elt (build_vector x, y), 1 -> y
13880   if (ConstEltNo &&
13881       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13882       TLI.isTypeLegal(VT) &&
13883       (InVec.hasOneUse() ||
13884        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13885     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13886     EVT InEltVT = Elt.getValueType();
13887 
13888     // Sometimes build_vector's scalar input types do not match result type.
13889     if (NVT == InEltVT)
13890       return Elt;
13891 
13892     // TODO: It may be useful to truncate if free if the build_vector implicitly
13893     // converts.
13894   }
13895 
13896   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
13897   bool isLE = DAG.getDataLayout().isLittleEndian();
13898   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
13899   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13900       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
13901     SDValue BCSrc = InVec.getOperand(0);
13902     if (BCSrc.getValueType().isScalarInteger())
13903       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13904   }
13905 
13906   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13907   //
13908   // This only really matters if the index is non-constant since other combines
13909   // on the constant elements already work.
13910   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13911       EltNo == InVec.getOperand(2)) {
13912     SDValue Elt = InVec.getOperand(1);
13913     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13914   }
13915 
13916   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13917   // We only perform this optimization before the op legalization phase because
13918   // we may introduce new vector instructions which are not backed by TD
13919   // patterns. For example on AVX, extracting elements from a wide vector
13920   // without using extract_subvector. However, if we can find an underlying
13921   // scalar value, then we can always use that.
13922   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13923     int NumElem = VT.getVectorNumElements();
13924     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13925     // Find the new index to extract from.
13926     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13927 
13928     // Extracting an undef index is undef.
13929     if (OrigElt == -1)
13930       return DAG.getUNDEF(NVT);
13931 
13932     // Select the right vector half to extract from.
13933     SDValue SVInVec;
13934     if (OrigElt < NumElem) {
13935       SVInVec = InVec->getOperand(0);
13936     } else {
13937       SVInVec = InVec->getOperand(1);
13938       OrigElt -= NumElem;
13939     }
13940 
13941     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13942       SDValue InOp = SVInVec.getOperand(OrigElt);
13943       if (InOp.getValueType() != NVT) {
13944         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13945         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13946       }
13947 
13948       return InOp;
13949     }
13950 
13951     // FIXME: We should handle recursing on other vector shuffles and
13952     // scalar_to_vector here as well.
13953 
13954     if (!LegalOperations) {
13955       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13956       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13957                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13958     }
13959   }
13960 
13961   bool BCNumEltsChanged = false;
13962   EVT ExtVT = VT.getVectorElementType();
13963   EVT LVT = ExtVT;
13964 
13965   // If the result of load has to be truncated, then it's not necessarily
13966   // profitable.
13967   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13968     return SDValue();
13969 
13970   if (InVec.getOpcode() == ISD::BITCAST) {
13971     // Don't duplicate a load with other uses.
13972     if (!InVec.hasOneUse())
13973       return SDValue();
13974 
13975     EVT BCVT = InVec.getOperand(0).getValueType();
13976     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13977       return SDValue();
13978     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13979       BCNumEltsChanged = true;
13980     InVec = InVec.getOperand(0);
13981     ExtVT = BCVT.getVectorElementType();
13982   }
13983 
13984   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13985   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13986       ISD::isNormalLoad(InVec.getNode()) &&
13987       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13988     SDValue Index = N->getOperand(1);
13989     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13990       if (!OrigLoad->isVolatile()) {
13991         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13992                                                              OrigLoad);
13993       }
13994     }
13995   }
13996 
13997   // Perform only after legalization to ensure build_vector / vector_shuffle
13998   // optimizations have already been done.
13999   if (!LegalOperations) return SDValue();
14000 
14001   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14002   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14003   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14004 
14005   if (ConstEltNo) {
14006     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14007 
14008     LoadSDNode *LN0 = nullptr;
14009     const ShuffleVectorSDNode *SVN = nullptr;
14010     if (ISD::isNormalLoad(InVec.getNode())) {
14011       LN0 = cast<LoadSDNode>(InVec);
14012     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14013                InVec.getOperand(0).getValueType() == ExtVT &&
14014                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14015       // Don't duplicate a load with other uses.
14016       if (!InVec.hasOneUse())
14017         return SDValue();
14018 
14019       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14020     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14021       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14022       // =>
14023       // (load $addr+1*size)
14024 
14025       // Don't duplicate a load with other uses.
14026       if (!InVec.hasOneUse())
14027         return SDValue();
14028 
14029       // If the bit convert changed the number of elements, it is unsafe
14030       // to examine the mask.
14031       if (BCNumEltsChanged)
14032         return SDValue();
14033 
14034       // Select the input vector, guarding against out of range extract vector.
14035       unsigned NumElems = VT.getVectorNumElements();
14036       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14037       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14038 
14039       if (InVec.getOpcode() == ISD::BITCAST) {
14040         // Don't duplicate a load with other uses.
14041         if (!InVec.hasOneUse())
14042           return SDValue();
14043 
14044         InVec = InVec.getOperand(0);
14045       }
14046       if (ISD::isNormalLoad(InVec.getNode())) {
14047         LN0 = cast<LoadSDNode>(InVec);
14048         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14049         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14050       }
14051     }
14052 
14053     // Make sure we found a non-volatile load and the extractelement is
14054     // the only use.
14055     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14056       return SDValue();
14057 
14058     // If Idx was -1 above, Elt is going to be -1, so just return undef.
14059     if (Elt == -1)
14060       return DAG.getUNDEF(LVT);
14061 
14062     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14063   }
14064 
14065   return SDValue();
14066 }
14067 
14068 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
14069 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14070   // We perform this optimization post type-legalization because
14071   // the type-legalizer often scalarizes integer-promoted vectors.
14072   // Performing this optimization before may create bit-casts which
14073   // will be type-legalized to complex code sequences.
14074   // We perform this optimization only before the operation legalizer because we
14075   // may introduce illegal operations.
14076   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14077     return SDValue();
14078 
14079   unsigned NumInScalars = N->getNumOperands();
14080   SDLoc DL(N);
14081   EVT VT = N->getValueType(0);
14082 
14083   // Check to see if this is a BUILD_VECTOR of a bunch of values
14084   // which come from any_extend or zero_extend nodes. If so, we can create
14085   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14086   // optimizations. We do not handle sign-extend because we can't fill the sign
14087   // using shuffles.
14088   EVT SourceType = MVT::Other;
14089   bool AllAnyExt = true;
14090 
14091   for (unsigned i = 0; i != NumInScalars; ++i) {
14092     SDValue In = N->getOperand(i);
14093     // Ignore undef inputs.
14094     if (In.isUndef()) continue;
14095 
14096     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
14097     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14098 
14099     // Abort if the element is not an extension.
14100     if (!ZeroExt && !AnyExt) {
14101       SourceType = MVT::Other;
14102       break;
14103     }
14104 
14105     // The input is a ZeroExt or AnyExt. Check the original type.
14106     EVT InTy = In.getOperand(0).getValueType();
14107 
14108     // Check that all of the widened source types are the same.
14109     if (SourceType == MVT::Other)
14110       // First time.
14111       SourceType = InTy;
14112     else if (InTy != SourceType) {
14113       // Multiple income types. Abort.
14114       SourceType = MVT::Other;
14115       break;
14116     }
14117 
14118     // Check if all of the extends are ANY_EXTENDs.
14119     AllAnyExt &= AnyExt;
14120   }
14121 
14122   // In order to have valid types, all of the inputs must be extended from the
14123   // same source type and all of the inputs must be any or zero extend.
14124   // Scalar sizes must be a power of two.
14125   EVT OutScalarTy = VT.getScalarType();
14126   bool ValidTypes = SourceType != MVT::Other &&
14127                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14128                  isPowerOf2_32(SourceType.getSizeInBits());
14129 
14130   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14131   // turn into a single shuffle instruction.
14132   if (!ValidTypes)
14133     return SDValue();
14134 
14135   bool isLE = DAG.getDataLayout().isLittleEndian();
14136   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14137   assert(ElemRatio > 1 && "Invalid element size ratio");
14138   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14139                                DAG.getConstant(0, DL, SourceType);
14140 
14141   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14142   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14143 
14144   // Populate the new build_vector
14145   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14146     SDValue Cast = N->getOperand(i);
14147     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
14148             Cast.getOpcode() == ISD::ZERO_EXTEND ||
14149             Cast.isUndef()) && "Invalid cast opcode");
14150     SDValue In;
14151     if (Cast.isUndef())
14152       In = DAG.getUNDEF(SourceType);
14153     else
14154       In = Cast->getOperand(0);
14155     unsigned Index = isLE ? (i * ElemRatio) :
14156                             (i * ElemRatio + (ElemRatio - 1));
14157 
14158     assert(Index < Ops.size() && "Invalid index");
14159     Ops[Index] = In;
14160   }
14161 
14162   // The type of the new BUILD_VECTOR node.
14163   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14164   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
14165          "Invalid vector size");
14166   // Check if the new vector type is legal.
14167   if (!isTypeLegal(VecVT)) return SDValue();
14168 
14169   // Make the new BUILD_VECTOR.
14170   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14171 
14172   // The new BUILD_VECTOR node has the potential to be further optimized.
14173   AddToWorklist(BV.getNode());
14174   // Bitcast to the desired type.
14175   return DAG.getBitcast(VT, BV);
14176 }
14177 
14178 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14179   EVT VT = N->getValueType(0);
14180 
14181   unsigned NumInScalars = N->getNumOperands();
14182   SDLoc DL(N);
14183 
14184   EVT SrcVT = MVT::Other;
14185   unsigned Opcode = ISD::DELETED_NODE;
14186   unsigned NumDefs = 0;
14187 
14188   for (unsigned i = 0; i != NumInScalars; ++i) {
14189     SDValue In = N->getOperand(i);
14190     unsigned Opc = In.getOpcode();
14191 
14192     if (Opc == ISD::UNDEF)
14193       continue;
14194 
14195     // If all scalar values are floats and converted from integers.
14196     if (Opcode == ISD::DELETED_NODE &&
14197         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14198       Opcode = Opc;
14199     }
14200 
14201     if (Opc != Opcode)
14202       return SDValue();
14203 
14204     EVT InVT = In.getOperand(0).getValueType();
14205 
14206     // If all scalar values are typed differently, bail out. It's chosen to
14207     // simplify BUILD_VECTOR of integer types.
14208     if (SrcVT == MVT::Other)
14209       SrcVT = InVT;
14210     if (SrcVT != InVT)
14211       return SDValue();
14212     NumDefs++;
14213   }
14214 
14215   // If the vector has just one element defined, it's not worth to fold it into
14216   // a vectorized one.
14217   if (NumDefs < 2)
14218     return SDValue();
14219 
14220   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14221          && "Should only handle conversion from integer to float.");
14222   assert(SrcVT != MVT::Other && "Cannot determine source type!");
14223 
14224   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14225 
14226   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14227     return SDValue();
14228 
14229   // Just because the floating-point vector type is legal does not necessarily
14230   // mean that the corresponding integer vector type is.
14231   if (!isTypeLegal(NVT))
14232     return SDValue();
14233 
14234   SmallVector<SDValue, 8> Opnds;
14235   for (unsigned i = 0; i != NumInScalars; ++i) {
14236     SDValue In = N->getOperand(i);
14237 
14238     if (In.isUndef())
14239       Opnds.push_back(DAG.getUNDEF(SrcVT));
14240     else
14241       Opnds.push_back(In.getOperand(0));
14242   }
14243   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14244   AddToWorklist(BV.getNode());
14245 
14246   return DAG.getNode(Opcode, DL, VT, BV);
14247 }
14248 
14249 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14250                                            ArrayRef<int> VectorMask,
14251                                            SDValue VecIn1, SDValue VecIn2,
14252                                            unsigned LeftIdx) {
14253   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14254   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14255 
14256   EVT VT = N->getValueType(0);
14257   EVT InVT1 = VecIn1.getValueType();
14258   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14259 
14260   unsigned Vec2Offset = 0;
14261   unsigned NumElems = VT.getVectorNumElements();
14262   unsigned ShuffleNumElems = NumElems;
14263 
14264   // In case both the input vectors are extracted from same base
14265   // vector we do not need extra addend (Vec2Offset) while
14266   // computing shuffle mask.
14267   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14268       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14269       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14270     Vec2Offset = InVT1.getVectorNumElements();
14271 
14272   // We can't generate a shuffle node with mismatched input and output types.
14273   // Try to make the types match the type of the output.
14274   if (InVT1 != VT || InVT2 != VT) {
14275     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14276       // If the output vector length is a multiple of both input lengths,
14277       // we can concatenate them and pad the rest with undefs.
14278       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14279       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14280       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14281       ConcatOps[0] = VecIn1;
14282       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14283       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14284       VecIn2 = SDValue();
14285     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14286       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14287         return SDValue();
14288 
14289       if (!VecIn2.getNode()) {
14290         // If we only have one input vector, and it's twice the size of the
14291         // output, split it in two.
14292         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14293                              DAG.getConstant(NumElems, DL, IdxTy));
14294         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14295         // Since we now have shorter input vectors, adjust the offset of the
14296         // second vector's start.
14297         Vec2Offset = NumElems;
14298       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14299         // VecIn1 is wider than the output, and we have another, possibly
14300         // smaller input. Pad the smaller input with undefs, shuffle at the
14301         // input vector width, and extract the output.
14302         // The shuffle type is different than VT, so check legality again.
14303         if (LegalOperations &&
14304             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14305           return SDValue();
14306 
14307         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14308         // lower it back into a BUILD_VECTOR. So if the inserted type is
14309         // illegal, don't even try.
14310         if (InVT1 != InVT2) {
14311           if (!TLI.isTypeLegal(InVT2))
14312             return SDValue();
14313           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14314                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14315         }
14316         ShuffleNumElems = NumElems * 2;
14317       } else {
14318         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14319         // than VecIn1. We can't handle this for now - this case will disappear
14320         // when we start sorting the vectors by type.
14321         return SDValue();
14322       }
14323     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14324                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14325       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14326       ConcatOps[0] = VecIn2;
14327       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14328     } else {
14329       // TODO: Support cases where the length mismatch isn't exactly by a
14330       // factor of 2.
14331       // TODO: Move this check upwards, so that if we have bad type
14332       // mismatches, we don't create any DAG nodes.
14333       return SDValue();
14334     }
14335   }
14336 
14337   // Initialize mask to undef.
14338   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14339 
14340   // Only need to run up to the number of elements actually used, not the
14341   // total number of elements in the shuffle - if we are shuffling a wider
14342   // vector, the high lanes should be set to undef.
14343   for (unsigned i = 0; i != NumElems; ++i) {
14344     if (VectorMask[i] <= 0)
14345       continue;
14346 
14347     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14348     if (VectorMask[i] == (int)LeftIdx) {
14349       Mask[i] = ExtIndex;
14350     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14351       Mask[i] = Vec2Offset + ExtIndex;
14352     }
14353   }
14354 
14355   // The type the input vectors may have changed above.
14356   InVT1 = VecIn1.getValueType();
14357 
14358   // If we already have a VecIn2, it should have the same type as VecIn1.
14359   // If we don't, get an undef/zero vector of the appropriate type.
14360   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14361   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14362 
14363   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14364   if (ShuffleNumElems > NumElems)
14365     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14366 
14367   return Shuffle;
14368 }
14369 
14370 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14371 // operations. If the types of the vectors we're extracting from allow it,
14372 // turn this into a vector_shuffle node.
14373 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14374   SDLoc DL(N);
14375   EVT VT = N->getValueType(0);
14376 
14377   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14378   if (!isTypeLegal(VT))
14379     return SDValue();
14380 
14381   // May only combine to shuffle after legalize if shuffle is legal.
14382   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14383     return SDValue();
14384 
14385   bool UsesZeroVector = false;
14386   unsigned NumElems = N->getNumOperands();
14387 
14388   // Record, for each element of the newly built vector, which input vector
14389   // that element comes from. -1 stands for undef, 0 for the zero vector,
14390   // and positive values for the input vectors.
14391   // VectorMask maps each element to its vector number, and VecIn maps vector
14392   // numbers to their initial SDValues.
14393 
14394   SmallVector<int, 8> VectorMask(NumElems, -1);
14395   SmallVector<SDValue, 8> VecIn;
14396   VecIn.push_back(SDValue());
14397 
14398   for (unsigned i = 0; i != NumElems; ++i) {
14399     SDValue Op = N->getOperand(i);
14400 
14401     if (Op.isUndef())
14402       continue;
14403 
14404     // See if we can use a blend with a zero vector.
14405     // TODO: Should we generalize this to a blend with an arbitrary constant
14406     // vector?
14407     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14408       UsesZeroVector = true;
14409       VectorMask[i] = 0;
14410       continue;
14411     }
14412 
14413     // Not an undef or zero. If the input is something other than an
14414     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14415     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14416         !isa<ConstantSDNode>(Op.getOperand(1)))
14417       return SDValue();
14418     SDValue ExtractedFromVec = Op.getOperand(0);
14419 
14420     // All inputs must have the same element type as the output.
14421     if (VT.getVectorElementType() !=
14422         ExtractedFromVec.getValueType().getVectorElementType())
14423       return SDValue();
14424 
14425     // Have we seen this input vector before?
14426     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14427     // a map back from SDValues to numbers isn't worth it.
14428     unsigned Idx = std::distance(
14429         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14430     if (Idx == VecIn.size())
14431       VecIn.push_back(ExtractedFromVec);
14432 
14433     VectorMask[i] = Idx;
14434   }
14435 
14436   // If we didn't find at least one input vector, bail out.
14437   if (VecIn.size() < 2)
14438     return SDValue();
14439 
14440   // If all the Operands of BUILD_VECTOR extract from same
14441   // vector, then split the vector efficiently based on the maximum
14442   // vector access index and adjust the VectorMask and
14443   // VecIn accordingly.
14444   if (VecIn.size() == 2) {
14445     unsigned MaxIndex = 0;
14446     unsigned NearestPow2 = 0;
14447     SDValue Vec = VecIn.back();
14448     EVT InVT = Vec.getValueType();
14449     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14450     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14451 
14452     for (unsigned i = 0; i < NumElems; i++) {
14453       if (VectorMask[i] <= 0)
14454         continue;
14455       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14456       IndexVec[i] = Index;
14457       MaxIndex = std::max(MaxIndex, Index);
14458     }
14459 
14460     NearestPow2 = PowerOf2Ceil(MaxIndex);
14461     if (InVT.isSimple() && (NearestPow2 > 2) &&
14462         ((NumElems * 2) < NearestPow2)) {
14463       unsigned SplitSize = NearestPow2 / 2;
14464       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14465                                      InVT.getVectorElementType(), SplitSize);
14466       if (TLI.isTypeLegal(SplitVT)) {
14467         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14468                                      DAG.getConstant(SplitSize, DL, IdxTy));
14469         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14470                                      DAG.getConstant(0, DL, IdxTy));
14471         VecIn.pop_back();
14472         VecIn.push_back(VecIn1);
14473         VecIn.push_back(VecIn2);
14474 
14475         for (unsigned i = 0; i < NumElems; i++) {
14476           if (VectorMask[i] <= 0)
14477             continue;
14478           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
14479         }
14480       }
14481     }
14482   }
14483 
14484   // TODO: We want to sort the vectors by descending length, so that adjacent
14485   // pairs have similar length, and the longer vector is always first in the
14486   // pair.
14487 
14488   // TODO: Should this fire if some of the input vectors has illegal type (like
14489   // it does now), or should we let legalization run its course first?
14490 
14491   // Shuffle phase:
14492   // Take pairs of vectors, and shuffle them so that the result has elements
14493   // from these vectors in the correct places.
14494   // For example, given:
14495   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14496   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14497   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14498   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14499   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14500   // We will generate:
14501   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14502   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14503   SmallVector<SDValue, 4> Shuffles;
14504   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14505     unsigned LeftIdx = 2 * In + 1;
14506     SDValue VecLeft = VecIn[LeftIdx];
14507     SDValue VecRight =
14508         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14509 
14510     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14511                                                 VecRight, LeftIdx))
14512       Shuffles.push_back(Shuffle);
14513     else
14514       return SDValue();
14515   }
14516 
14517   // If we need the zero vector as an "ingredient" in the blend tree, add it
14518   // to the list of shuffles.
14519   if (UsesZeroVector)
14520     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14521                                       : DAG.getConstantFP(0.0, DL, VT));
14522 
14523   // If we only have one shuffle, we're done.
14524   if (Shuffles.size() == 1)
14525     return Shuffles[0];
14526 
14527   // Update the vector mask to point to the post-shuffle vectors.
14528   for (int &Vec : VectorMask)
14529     if (Vec == 0)
14530       Vec = Shuffles.size() - 1;
14531     else
14532       Vec = (Vec - 1) / 2;
14533 
14534   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14535   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14536   // generate:
14537   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14538   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14539   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14540   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14541   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14542   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14543   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14544 
14545   // Make sure the initial size of the shuffle list is even.
14546   if (Shuffles.size() % 2)
14547     Shuffles.push_back(DAG.getUNDEF(VT));
14548 
14549   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14550     if (CurSize % 2) {
14551       Shuffles[CurSize] = DAG.getUNDEF(VT);
14552       CurSize++;
14553     }
14554     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14555       int Left = 2 * In;
14556       int Right = 2 * In + 1;
14557       SmallVector<int, 8> Mask(NumElems, -1);
14558       for (unsigned i = 0; i != NumElems; ++i) {
14559         if (VectorMask[i] == Left) {
14560           Mask[i] = i;
14561           VectorMask[i] = In;
14562         } else if (VectorMask[i] == Right) {
14563           Mask[i] = i + NumElems;
14564           VectorMask[i] = In;
14565         }
14566       }
14567 
14568       Shuffles[In] =
14569           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14570     }
14571   }
14572   return Shuffles[0];
14573 }
14574 
14575 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14576 // operations which can be matched to a truncate or to a shuffle-truncate.
14577 SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
14578   // TODO: Add support for big-endian.
14579   if (DAG.getDataLayout().isBigEndian())
14580     return SDValue();
14581   if (N->getNumOperands() < 2)
14582     return SDValue();
14583   SDLoc DL(N);
14584   EVT VT = N->getValueType(0);
14585   unsigned NumElems = N->getNumOperands();
14586 
14587   if (!isTypeLegal(VT))
14588     return SDValue();
14589 
14590   // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
14591   // index, bail out.
14592   // TODO: Allow undef elements in some cases?
14593   if (any_of(N->ops(), [VT](SDValue Op) {
14594         return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14595                !isa<ConstantSDNode>(Op.getOperand(1)) ||
14596                Op.getValueType() != VT.getVectorElementType();
14597       }))
14598     return SDValue();
14599 
14600   // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
14601   auto GetExtractIdx = [](SDValue Extract) {
14602     return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
14603   };
14604 
14605   // The offset is defined to be the BUILD_VECTOR's first operand (assuming no
14606   // undef and little-endian).
14607   int Offset = GetExtractIdx(N->getOperand(0));
14608 
14609   // Compute the stride from the next operand.
14610   int Stride = GetExtractIdx(N->getOperand(1)) - Offset;
14611   SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
14612 
14613   // Proceed only if the stride and the types can be matched to a truncate.
14614   if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
14615       (ExtractedFromVec.getValueType().getVectorNumElements() !=
14616        Stride * NumElems) ||
14617       (VT.getScalarSizeInBits() * Stride > 64))
14618     return SDValue();
14619 
14620   // Check remaining operands are consistent with the computed stride.
14621   for (unsigned i = 1; i != NumElems; ++i) {
14622     SDValue Op = N->getOperand(i);
14623 
14624     if ((Op.getOperand(0) != ExtractedFromVec) ||
14625         (GetExtractIdx(Op) != Stride * i + Offset))
14626       return SDValue();
14627   }
14628 
14629   SDValue Res = ExtractedFromVec;
14630   EVT TruncVT =
14631       VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
14632   if (Offset) {
14633     // If the first index is non-zero, need to shuffle elements of interest to
14634     // lower parts of the vector's elements the truncate will act upon.
14635     // TODO: Generalize to compute the permute-shuffle that will prepare any
14636     // element permutation for the truncate, and let the target decide if
14637     // profitable.
14638     EVT ExtractedVT = ExtractedFromVec.getValueType();
14639     SmallVector<int, 64> Mask;
14640     for (unsigned i = 0; i != NumElems; ++i) {
14641       Mask.push_back(Offset + i * Stride);
14642       // Pad the elements that will be lost after the truncate with undefs.
14643       Mask.append(Stride - 1, -1);
14644     }
14645     if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) ||
14646         !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT,
14647                                                               TruncVT))
14648       return SDValue();
14649     Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res,
14650                                DAG.getUNDEF(ExtractedVT), Mask);
14651   }
14652   // Construct the truncate.
14653   LLVMContext &Ctx = *DAG.getContext();
14654   EVT NewVT = VT.getVectorVT(
14655       Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
14656 
14657   Res = DAG.getBitcast(NewVT, Res);
14658   Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
14659   return DAG.getBitcast(VT, Res);
14660 }
14661 
14662 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14663   EVT VT = N->getValueType(0);
14664 
14665   // A vector built entirely of undefs is undef.
14666   if (ISD::allOperandsUndef(N))
14667     return DAG.getUNDEF(VT);
14668 
14669   // Check if we can express BUILD VECTOR via subvector extract.
14670   if (!LegalTypes && (N->getNumOperands() > 1)) {
14671     SDValue Op0 = N->getOperand(0);
14672     auto checkElem = [&](SDValue Op) -> uint64_t {
14673       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14674           (Op0.getOperand(0) == Op.getOperand(0)))
14675         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14676           return CNode->getZExtValue();
14677       return -1;
14678     };
14679 
14680     int Offset = checkElem(Op0);
14681     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14682       if (Offset + i != checkElem(N->getOperand(i))) {
14683         Offset = -1;
14684         break;
14685       }
14686     }
14687 
14688     if ((Offset == 0) &&
14689         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14690       return Op0.getOperand(0);
14691     if ((Offset != -1) &&
14692         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14693          0)) // IDX must be multiple of output size.
14694       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14695                          Op0.getOperand(0), Op0.getOperand(1));
14696   }
14697 
14698   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14699     return V;
14700 
14701   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14702     return V;
14703 
14704   if (TLI.isDesirableToCombineBuildVectorToTruncate())
14705     if (SDValue V = reduceBuildVecToTrunc(N))
14706       return V;
14707 
14708   if (SDValue V = reduceBuildVecToShuffle(N))
14709     return V;
14710 
14711   return SDValue();
14712 }
14713 
14714 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14715   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14716   EVT OpVT = N->getOperand(0).getValueType();
14717 
14718   // If the operands are legal vectors, leave them alone.
14719   if (TLI.isTypeLegal(OpVT))
14720     return SDValue();
14721 
14722   SDLoc DL(N);
14723   EVT VT = N->getValueType(0);
14724   SmallVector<SDValue, 8> Ops;
14725 
14726   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14727   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14728 
14729   // Keep track of what we encounter.
14730   bool AnyInteger = false;
14731   bool AnyFP = false;
14732   for (const SDValue &Op : N->ops()) {
14733     if (ISD::BITCAST == Op.getOpcode() &&
14734         !Op.getOperand(0).getValueType().isVector())
14735       Ops.push_back(Op.getOperand(0));
14736     else if (ISD::UNDEF == Op.getOpcode())
14737       Ops.push_back(ScalarUndef);
14738     else
14739       return SDValue();
14740 
14741     // Note whether we encounter an integer or floating point scalar.
14742     // If it's neither, bail out, it could be something weird like x86mmx.
14743     EVT LastOpVT = Ops.back().getValueType();
14744     if (LastOpVT.isFloatingPoint())
14745       AnyFP = true;
14746     else if (LastOpVT.isInteger())
14747       AnyInteger = true;
14748     else
14749       return SDValue();
14750   }
14751 
14752   // If any of the operands is a floating point scalar bitcast to a vector,
14753   // use floating point types throughout, and bitcast everything.
14754   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14755   if (AnyFP) {
14756     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14757     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14758     if (AnyInteger) {
14759       for (SDValue &Op : Ops) {
14760         if (Op.getValueType() == SVT)
14761           continue;
14762         if (Op.isUndef())
14763           Op = ScalarUndef;
14764         else
14765           Op = DAG.getBitcast(SVT, Op);
14766       }
14767     }
14768   }
14769 
14770   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14771                                VT.getSizeInBits() / SVT.getSizeInBits());
14772   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14773 }
14774 
14775 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14776 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14777 // most two distinct vectors the same size as the result, attempt to turn this
14778 // into a legal shuffle.
14779 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14780   EVT VT = N->getValueType(0);
14781   EVT OpVT = N->getOperand(0).getValueType();
14782   int NumElts = VT.getVectorNumElements();
14783   int NumOpElts = OpVT.getVectorNumElements();
14784 
14785   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14786   SmallVector<int, 8> Mask;
14787 
14788   for (SDValue Op : N->ops()) {
14789     // Peek through any bitcast.
14790     Op = peekThroughBitcast(Op);
14791 
14792     // UNDEF nodes convert to UNDEF shuffle mask values.
14793     if (Op.isUndef()) {
14794       Mask.append((unsigned)NumOpElts, -1);
14795       continue;
14796     }
14797 
14798     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14799       return SDValue();
14800 
14801     // What vector are we extracting the subvector from and at what index?
14802     SDValue ExtVec = Op.getOperand(0);
14803 
14804     // We want the EVT of the original extraction to correctly scale the
14805     // extraction index.
14806     EVT ExtVT = ExtVec.getValueType();
14807 
14808     // Peek through any bitcast.
14809     ExtVec = peekThroughBitcast(ExtVec);
14810 
14811     // UNDEF nodes convert to UNDEF shuffle mask values.
14812     if (ExtVec.isUndef()) {
14813       Mask.append((unsigned)NumOpElts, -1);
14814       continue;
14815     }
14816 
14817     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14818       return SDValue();
14819     int ExtIdx = Op.getConstantOperandVal(1);
14820 
14821     // Ensure that we are extracting a subvector from a vector the same
14822     // size as the result.
14823     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14824       return SDValue();
14825 
14826     // Scale the subvector index to account for any bitcast.
14827     int NumExtElts = ExtVT.getVectorNumElements();
14828     if (0 == (NumExtElts % NumElts))
14829       ExtIdx /= (NumExtElts / NumElts);
14830     else if (0 == (NumElts % NumExtElts))
14831       ExtIdx *= (NumElts / NumExtElts);
14832     else
14833       return SDValue();
14834 
14835     // At most we can reference 2 inputs in the final shuffle.
14836     if (SV0.isUndef() || SV0 == ExtVec) {
14837       SV0 = ExtVec;
14838       for (int i = 0; i != NumOpElts; ++i)
14839         Mask.push_back(i + ExtIdx);
14840     } else if (SV1.isUndef() || SV1 == ExtVec) {
14841       SV1 = ExtVec;
14842       for (int i = 0; i != NumOpElts; ++i)
14843         Mask.push_back(i + ExtIdx + NumElts);
14844     } else {
14845       return SDValue();
14846     }
14847   }
14848 
14849   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14850     return SDValue();
14851 
14852   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14853                               DAG.getBitcast(VT, SV1), Mask);
14854 }
14855 
14856 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14857   // If we only have one input vector, we don't need to do any concatenation.
14858   if (N->getNumOperands() == 1)
14859     return N->getOperand(0);
14860 
14861   // Check if all of the operands are undefs.
14862   EVT VT = N->getValueType(0);
14863   if (ISD::allOperandsUndef(N))
14864     return DAG.getUNDEF(VT);
14865 
14866   // Optimize concat_vectors where all but the first of the vectors are undef.
14867   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14868         return Op.isUndef();
14869       })) {
14870     SDValue In = N->getOperand(0);
14871     assert(In.getValueType().isVector() && "Must concat vectors");
14872 
14873     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14874     if (In->getOpcode() == ISD::BITCAST &&
14875         !In->getOperand(0)->getValueType(0).isVector()) {
14876       SDValue Scalar = In->getOperand(0);
14877 
14878       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14879       // look through the trunc so we can still do the transform:
14880       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14881       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14882           !TLI.isTypeLegal(Scalar.getValueType()) &&
14883           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14884         Scalar = Scalar->getOperand(0);
14885 
14886       EVT SclTy = Scalar->getValueType(0);
14887 
14888       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14889         return SDValue();
14890 
14891       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14892       if (VNTNumElms < 2)
14893         return SDValue();
14894 
14895       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14896       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14897         return SDValue();
14898 
14899       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14900       return DAG.getBitcast(VT, Res);
14901     }
14902   }
14903 
14904   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14905   // We have already tested above for an UNDEF only concatenation.
14906   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14907   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14908   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14909     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14910   };
14911   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14912     SmallVector<SDValue, 8> Opnds;
14913     EVT SVT = VT.getScalarType();
14914 
14915     EVT MinVT = SVT;
14916     if (!SVT.isFloatingPoint()) {
14917       // If BUILD_VECTOR are from built from integer, they may have different
14918       // operand types. Get the smallest type and truncate all operands to it.
14919       bool FoundMinVT = false;
14920       for (const SDValue &Op : N->ops())
14921         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14922           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14923           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14924           FoundMinVT = true;
14925         }
14926       assert(FoundMinVT && "Concat vector type mismatch");
14927     }
14928 
14929     for (const SDValue &Op : N->ops()) {
14930       EVT OpVT = Op.getValueType();
14931       unsigned NumElts = OpVT.getVectorNumElements();
14932 
14933       if (ISD::UNDEF == Op.getOpcode())
14934         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14935 
14936       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14937         if (SVT.isFloatingPoint()) {
14938           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14939           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14940         } else {
14941           for (unsigned i = 0; i != NumElts; ++i)
14942             Opnds.push_back(
14943                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14944         }
14945       }
14946     }
14947 
14948     assert(VT.getVectorNumElements() == Opnds.size() &&
14949            "Concat vector type mismatch");
14950     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14951   }
14952 
14953   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14954   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14955     return V;
14956 
14957   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14958   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14959     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14960       return V;
14961 
14962   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14963   // nodes often generate nop CONCAT_VECTOR nodes.
14964   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14965   // place the incoming vectors at the exact same location.
14966   SDValue SingleSource = SDValue();
14967   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14968 
14969   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14970     SDValue Op = N->getOperand(i);
14971 
14972     if (Op.isUndef())
14973       continue;
14974 
14975     // Check if this is the identity extract:
14976     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14977       return SDValue();
14978 
14979     // Find the single incoming vector for the extract_subvector.
14980     if (SingleSource.getNode()) {
14981       if (Op.getOperand(0) != SingleSource)
14982         return SDValue();
14983     } else {
14984       SingleSource = Op.getOperand(0);
14985 
14986       // Check the source type is the same as the type of the result.
14987       // If not, this concat may extend the vector, so we can not
14988       // optimize it away.
14989       if (SingleSource.getValueType() != N->getValueType(0))
14990         return SDValue();
14991     }
14992 
14993     unsigned IdentityIndex = i * PartNumElem;
14994     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14995     // The extract index must be constant.
14996     if (!CS)
14997       return SDValue();
14998 
14999     // Check that we are reading from the identity index.
15000     if (CS->getZExtValue() != IdentityIndex)
15001       return SDValue();
15002   }
15003 
15004   if (SingleSource.getNode())
15005     return SingleSource;
15006 
15007   return SDValue();
15008 }
15009 
15010 /// If we are extracting a subvector produced by a wide binary operator with at
15011 /// at least one operand that was the result of a vector concatenation, then try
15012 /// to use the narrow vector operands directly to avoid the concatenation and
15013 /// extraction.
15014 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15015   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15016   // some of these bailouts with other transforms.
15017 
15018   // The extract index must be a constant, so we can map it to a concat operand.
15019   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15020   if (!ExtractIndex)
15021     return SDValue();
15022 
15023   // Only handle the case where we are doubling and then halving. A larger ratio
15024   // may require more than two narrow binops to replace the wide binop.
15025   EVT VT = Extract->getValueType(0);
15026   unsigned NumElems = VT.getVectorNumElements();
15027   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15028          "Extract index is not a multiple of the vector length.");
15029   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15030     return SDValue();
15031 
15032   // We are looking for an optionally bitcasted wide vector binary operator
15033   // feeding an extract subvector.
15034   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15035 
15036   // TODO: The motivating case for this transform is an x86 AVX1 target. That
15037   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15038   // flavors, but no other 256-bit integer support. This could be extended to
15039   // handle any binop, but that may require fixing/adding other folds to avoid
15040   // codegen regressions.
15041   unsigned BOpcode = BinOp.getOpcode();
15042   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15043     return SDValue();
15044 
15045   // The binop must be a vector type, so we can chop it in half.
15046   EVT WideBVT = BinOp.getValueType();
15047   if (!WideBVT.isVector())
15048     return SDValue();
15049 
15050   // Bail out if the target does not support a narrower version of the binop.
15051   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15052                                    WideBVT.getVectorNumElements() / 2);
15053   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15054   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15055     return SDValue();
15056 
15057   // Peek through bitcasts of the binary operator operands if needed.
15058   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15059   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15060 
15061   // We need at least one concatenation operation of a binop operand to make
15062   // this transform worthwhile. The concat must double the input vector sizes.
15063   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15064   bool ConcatL =
15065       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15066   bool ConcatR =
15067       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15068   if (!ConcatL && !ConcatR)
15069     return SDValue();
15070 
15071   // If one of the binop operands was not the result of a concat, we must
15072   // extract a half-sized operand for our new narrow binop. We can't just reuse
15073   // the original extract index operand because we may have bitcasted.
15074   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15075   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15076   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15077   SDLoc DL(Extract);
15078 
15079   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15080   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15081   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15082   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15083                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15084                                     BinOp.getOperand(0),
15085                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15086 
15087   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15088                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15089                                     BinOp.getOperand(1),
15090                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15091 
15092   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15093   return DAG.getBitcast(VT, NarrowBinOp);
15094 }
15095 
15096 /// If we are extracting a subvector from a wide vector load, convert to a
15097 /// narrow load to eliminate the extraction:
15098 /// (extract_subvector (load wide vector)) --> (load narrow vector)
15099 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15100   // TODO: Add support for big-endian. The offset calculation must be adjusted.
15101   if (DAG.getDataLayout().isBigEndian())
15102     return SDValue();
15103 
15104   // TODO: The one-use check is overly conservative. Check the cost of the
15105   // extract instead or remove that condition entirely.
15106   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15107   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15108   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
15109       !ExtIdx)
15110     return SDValue();
15111 
15112   // The narrow load will be offset from the base address of the old load if
15113   // we are extracting from something besides index 0 (little-endian).
15114   EVT VT = Extract->getValueType(0);
15115   SDLoc DL(Extract);
15116   SDValue BaseAddr = Ld->getOperand(1);
15117   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15118 
15119   // TODO: Use "BaseIndexOffset" to make this more effective.
15120   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15121   MachineFunction &MF = DAG.getMachineFunction();
15122   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15123                                                    VT.getStoreSize());
15124   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15125   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15126   return NewLd;
15127 }
15128 
15129 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15130   EVT NVT = N->getValueType(0);
15131   SDValue V = N->getOperand(0);
15132 
15133   // Extract from UNDEF is UNDEF.
15134   if (V.isUndef())
15135     return DAG.getUNDEF(NVT);
15136 
15137   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
15138     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
15139       return NarrowLoad;
15140 
15141   // Combine:
15142   //    (extract_subvec (concat V1, V2, ...), i)
15143   // Into:
15144   //    Vi if possible
15145   // Only operand 0 is checked as 'concat' assumes all inputs of the same
15146   // type.
15147   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
15148       isa<ConstantSDNode>(N->getOperand(1)) &&
15149       V->getOperand(0).getValueType() == NVT) {
15150     unsigned Idx = N->getConstantOperandVal(1);
15151     unsigned NumElems = NVT.getVectorNumElements();
15152     assert((Idx % NumElems) == 0 &&
15153            "IDX in concat is not a multiple of the result vector length.");
15154     return V->getOperand(Idx / NumElems);
15155   }
15156 
15157   // Skip bitcasting
15158   V = peekThroughBitcast(V);
15159 
15160   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
15161     // Handle only simple case where vector being inserted and vector
15162     // being extracted are of same size.
15163     EVT SmallVT = V->getOperand(1).getValueType();
15164     if (!NVT.bitsEq(SmallVT))
15165       return SDValue();
15166 
15167     // Only handle cases where both indexes are constants.
15168     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15169     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15170 
15171     if (InsIdx && ExtIdx) {
15172       // Combine:
15173       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15174       // Into:
15175       //    indices are equal or bit offsets are equal => V1
15176       //    otherwise => (extract_subvec V1, ExtIdx)
15177       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15178           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15179         return DAG.getBitcast(NVT, V->getOperand(1));
15180       return DAG.getNode(
15181           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15182           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15183           N->getOperand(1));
15184     }
15185   }
15186 
15187   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
15188     return NarrowBOp;
15189 
15190   return SDValue();
15191 }
15192 
15193 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
15194                                                  SDValue V, SelectionDAG &DAG) {
15195   SDLoc DL(V);
15196   EVT VT = V.getValueType();
15197 
15198   switch (V.getOpcode()) {
15199   default:
15200     return V;
15201 
15202   case ISD::CONCAT_VECTORS: {
15203     EVT OpVT = V->getOperand(0).getValueType();
15204     int OpSize = OpVT.getVectorNumElements();
15205     SmallBitVector OpUsedElements(OpSize, false);
15206     bool FoundSimplification = false;
15207     SmallVector<SDValue, 4> NewOps;
15208     NewOps.reserve(V->getNumOperands());
15209     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
15210       SDValue Op = V->getOperand(i);
15211       bool OpUsed = false;
15212       for (int j = 0; j < OpSize; ++j)
15213         if (UsedElements[i * OpSize + j]) {
15214           OpUsedElements[j] = true;
15215           OpUsed = true;
15216         }
15217       NewOps.push_back(
15218           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
15219                  : DAG.getUNDEF(OpVT));
15220       FoundSimplification |= Op == NewOps.back();
15221       OpUsedElements.reset();
15222     }
15223     if (FoundSimplification)
15224       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
15225     return V;
15226   }
15227 
15228   case ISD::INSERT_SUBVECTOR: {
15229     SDValue BaseV = V->getOperand(0);
15230     SDValue SubV = V->getOperand(1);
15231     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
15232     if (!IdxN)
15233       return V;
15234 
15235     int SubSize = SubV.getValueType().getVectorNumElements();
15236     int Idx = IdxN->getZExtValue();
15237     bool SubVectorUsed = false;
15238     SmallBitVector SubUsedElements(SubSize, false);
15239     for (int i = 0; i < SubSize; ++i)
15240       if (UsedElements[i + Idx]) {
15241         SubVectorUsed = true;
15242         SubUsedElements[i] = true;
15243         UsedElements[i + Idx] = false;
15244       }
15245 
15246     // Now recurse on both the base and sub vectors.
15247     SDValue SimplifiedSubV =
15248         SubVectorUsed
15249             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
15250             : DAG.getUNDEF(SubV.getValueType());
15251     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
15252     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
15253       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
15254                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
15255     return V;
15256   }
15257   }
15258 }
15259 
15260 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
15261                                        SDValue N1, SelectionDAG &DAG) {
15262   EVT VT = SVN->getValueType(0);
15263   int NumElts = VT.getVectorNumElements();
15264   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
15265   for (int M : SVN->getMask())
15266     if (M >= 0 && M < NumElts)
15267       N0UsedElements[M] = true;
15268     else if (M >= NumElts)
15269       N1UsedElements[M - NumElts] = true;
15270 
15271   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
15272   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
15273   if (S0 == N0 && S1 == N1)
15274     return SDValue();
15275 
15276   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
15277 }
15278 
15279 static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
15280                                    SDValue N1, SelectionDAG &DAG) {
15281   auto isUndefElt = [](SDValue V, int Idx) {
15282     // TODO - handle more cases as required.
15283     if (V.getOpcode() == ISD::BUILD_VECTOR)
15284       return V.getOperand(Idx).isUndef();
15285     return false;
15286   };
15287 
15288   EVT VT = SVN->getValueType(0);
15289   unsigned NumElts = VT.getVectorNumElements();
15290 
15291   bool Changed = false;
15292   SmallVector<int, 8> NewMask;
15293   for (unsigned i = 0; i != NumElts; ++i) {
15294     int Idx = SVN->getMaskElt(i);
15295     if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
15296         ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
15297       Changed = true;
15298       Idx = -1;
15299     }
15300     NewMask.push_back(Idx);
15301   }
15302   if (Changed)
15303     return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
15304 
15305   return SDValue();
15306 }
15307 
15308 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15309 // or turn a shuffle of a single concat into simpler shuffle then concat.
15310 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15311   EVT VT = N->getValueType(0);
15312   unsigned NumElts = VT.getVectorNumElements();
15313 
15314   SDValue N0 = N->getOperand(0);
15315   SDValue N1 = N->getOperand(1);
15316   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15317 
15318   SmallVector<SDValue, 4> Ops;
15319   EVT ConcatVT = N0.getOperand(0).getValueType();
15320   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15321   unsigned NumConcats = NumElts / NumElemsPerConcat;
15322 
15323   // Special case: shuffle(concat(A,B)) can be more efficiently represented
15324   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15325   // half vector elements.
15326   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15327       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15328                   SVN->getMask().end(), [](int i) { return i == -1; })) {
15329     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15330                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15331     N1 = DAG.getUNDEF(ConcatVT);
15332     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15333   }
15334 
15335   // Look at every vector that's inserted. We're looking for exact
15336   // subvector-sized copies from a concatenated vector
15337   for (unsigned I = 0; I != NumConcats; ++I) {
15338     // Make sure we're dealing with a copy.
15339     unsigned Begin = I * NumElemsPerConcat;
15340     bool AllUndef = true, NoUndef = true;
15341     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15342       if (SVN->getMaskElt(J) >= 0)
15343         AllUndef = false;
15344       else
15345         NoUndef = false;
15346     }
15347 
15348     if (NoUndef) {
15349       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15350         return SDValue();
15351 
15352       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15353         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15354           return SDValue();
15355 
15356       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15357       if (FirstElt < N0.getNumOperands())
15358         Ops.push_back(N0.getOperand(FirstElt));
15359       else
15360         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15361 
15362     } else if (AllUndef) {
15363       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15364     } else { // Mixed with general masks and undefs, can't do optimization.
15365       return SDValue();
15366     }
15367   }
15368 
15369   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15370 }
15371 
15372 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15373 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15374 //
15375 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15376 // a simplification in some sense, but it isn't appropriate in general: some
15377 // BUILD_VECTORs are substantially cheaper than others. The general case
15378 // of a BUILD_VECTOR requires inserting each element individually (or
15379 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15380 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15381 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15382 // are undef lowers to a small number of element insertions.
15383 //
15384 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15385 // We don't fold shuffles where one side is a non-zero constant, and we don't
15386 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
15387 // non-constant operands. This seems to work out reasonably well in practice.
15388 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15389                                        SelectionDAG &DAG,
15390                                        const TargetLowering &TLI) {
15391   EVT VT = SVN->getValueType(0);
15392   unsigned NumElts = VT.getVectorNumElements();
15393   SDValue N0 = SVN->getOperand(0);
15394   SDValue N1 = SVN->getOperand(1);
15395 
15396   if (!N0->hasOneUse() || !N1->hasOneUse())
15397     return SDValue();
15398   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15399   // discussed above.
15400   if (!N1.isUndef()) {
15401     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15402     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15403     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15404       return SDValue();
15405     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15406       return SDValue();
15407   }
15408 
15409   SmallVector<SDValue, 8> Ops;
15410   SmallSet<SDValue, 16> DuplicateOps;
15411   for (int M : SVN->getMask()) {
15412     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15413     if (M >= 0) {
15414       int Idx = M < (int)NumElts ? M : M - NumElts;
15415       SDValue &S = (M < (int)NumElts ? N0 : N1);
15416       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15417         Op = S.getOperand(Idx);
15418       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15419         if (Idx == 0)
15420           Op = S.getOperand(0);
15421       } else {
15422         // Operand can't be combined - bail out.
15423         return SDValue();
15424       }
15425     }
15426 
15427     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
15428     // fine, but it's likely to generate low-quality code if the target can't
15429     // reconstruct an appropriate shuffle.
15430     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15431       if (!DuplicateOps.insert(Op).second)
15432         return SDValue();
15433 
15434     Ops.push_back(Op);
15435   }
15436   // BUILD_VECTOR requires all inputs to be of the same type, find the
15437   // maximum type and extend them all.
15438   EVT SVT = VT.getScalarType();
15439   if (SVT.isInteger())
15440     for (SDValue &Op : Ops)
15441       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15442   if (SVT != VT.getScalarType())
15443     for (SDValue &Op : Ops)
15444       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15445                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15446                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15447   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15448 }
15449 
15450 // Match shuffles that can be converted to any_vector_extend_in_reg.
15451 // This is often generated during legalization.
15452 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15453 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15454 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15455                                             SelectionDAG &DAG,
15456                                             const TargetLowering &TLI,
15457                                             bool LegalOperations) {
15458   EVT VT = SVN->getValueType(0);
15459   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15460 
15461   // TODO Add support for big-endian when we have a test case.
15462   if (!VT.isInteger() || IsBigEndian)
15463     return SDValue();
15464 
15465   unsigned NumElts = VT.getVectorNumElements();
15466   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15467   ArrayRef<int> Mask = SVN->getMask();
15468   SDValue N0 = SVN->getOperand(0);
15469 
15470   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15471   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15472     for (unsigned i = 0; i != NumElts; ++i) {
15473       if (Mask[i] < 0)
15474         continue;
15475       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15476         continue;
15477       return false;
15478     }
15479     return true;
15480   };
15481 
15482   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15483   // power-of-2 extensions as they are the most likely.
15484   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15485     if (!isAnyExtend(Scale))
15486       continue;
15487 
15488     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15489     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15490     if (!LegalOperations ||
15491         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15492       return DAG.getBitcast(VT,
15493                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15494   }
15495 
15496   return SDValue();
15497 }
15498 
15499 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15500 // each source element of a large type into the lowest elements of a smaller
15501 // destination type. This is often generated during legalization.
15502 // If the source node itself was a '*_extend_vector_inreg' node then we should
15503 // then be able to remove it.
15504 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15505                                         SelectionDAG &DAG) {
15506   EVT VT = SVN->getValueType(0);
15507   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15508 
15509   // TODO Add support for big-endian when we have a test case.
15510   if (!VT.isInteger() || IsBigEndian)
15511     return SDValue();
15512 
15513   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15514 
15515   unsigned Opcode = N0.getOpcode();
15516   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15517       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15518       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15519     return SDValue();
15520 
15521   SDValue N00 = N0.getOperand(0);
15522   ArrayRef<int> Mask = SVN->getMask();
15523   unsigned NumElts = VT.getVectorNumElements();
15524   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15525   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15526   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15527 
15528   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15529     return SDValue();
15530   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15531 
15532   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15533   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15534   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15535   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15536     for (unsigned i = 0; i != NumElts; ++i) {
15537       if (Mask[i] < 0)
15538         continue;
15539       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15540         continue;
15541       return false;
15542     }
15543     return true;
15544   };
15545 
15546   // At the moment we just handle the case where we've truncated back to the
15547   // same size as before the extension.
15548   // TODO: handle more extension/truncation cases as cases arise.
15549   if (EltSizeInBits != ExtSrcSizeInBits)
15550     return SDValue();
15551 
15552   // We can remove *extend_vector_inreg only if the truncation happens at
15553   // the same scale as the extension.
15554   if (isTruncate(ExtScale))
15555     return DAG.getBitcast(VT, N00);
15556 
15557   return SDValue();
15558 }
15559 
15560 // Combine shuffles of splat-shuffles of the form:
15561 // shuffle (shuffle V, undef, splat-mask), undef, M
15562 // If splat-mask contains undef elements, we need to be careful about
15563 // introducing undef's in the folded mask which are not the result of composing
15564 // the masks of the shuffles.
15565 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15566                                      ShuffleVectorSDNode *Splat,
15567                                      SelectionDAG &DAG) {
15568   ArrayRef<int> SplatMask = Splat->getMask();
15569   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15570 
15571   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15572   // every undef mask element in the splat-shuffle has a corresponding undef
15573   // element in the user-shuffle's mask or if the composition of mask elements
15574   // would result in undef.
15575   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15576   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15577   //   In this case it is not legal to simplify to the splat-shuffle because we
15578   //   may be exposing the users of the shuffle an undef element at index 1
15579   //   which was not there before the combine.
15580   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15581   //   In this case the composition of masks yields SplatMask, so it's ok to
15582   //   simplify to the splat-shuffle.
15583   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15584   //   In this case the composed mask includes all undef elements of SplatMask
15585   //   and in addition sets element zero to undef. It is safe to simplify to
15586   //   the splat-shuffle.
15587   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15588                                        ArrayRef<int> SplatMask) {
15589     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15590       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15591           SplatMask[UserMask[i]] != -1)
15592         return false;
15593     return true;
15594   };
15595   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15596     return SDValue(Splat, 0);
15597 
15598   // Create a new shuffle with a mask that is composed of the two shuffles'
15599   // masks.
15600   SmallVector<int, 32> NewMask;
15601   for (int Idx : UserMask)
15602     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15603 
15604   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15605                               Splat->getOperand(0), Splat->getOperand(1),
15606                               NewMask);
15607 }
15608 
15609 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15610   EVT VT = N->getValueType(0);
15611   unsigned NumElts = VT.getVectorNumElements();
15612 
15613   SDValue N0 = N->getOperand(0);
15614   SDValue N1 = N->getOperand(1);
15615 
15616   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15617 
15618   // Canonicalize shuffle undef, undef -> undef
15619   if (N0.isUndef() && N1.isUndef())
15620     return DAG.getUNDEF(VT);
15621 
15622   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15623 
15624   // Canonicalize shuffle v, v -> v, undef
15625   if (N0 == N1) {
15626     SmallVector<int, 8> NewMask;
15627     for (unsigned i = 0; i != NumElts; ++i) {
15628       int Idx = SVN->getMaskElt(i);
15629       if (Idx >= (int)NumElts) Idx -= NumElts;
15630       NewMask.push_back(Idx);
15631     }
15632     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15633   }
15634 
15635   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15636   if (N0.isUndef())
15637     return DAG.getCommutedVectorShuffle(*SVN);
15638 
15639   // Remove references to rhs if it is undef
15640   if (N1.isUndef()) {
15641     bool Changed = false;
15642     SmallVector<int, 8> NewMask;
15643     for (unsigned i = 0; i != NumElts; ++i) {
15644       int Idx = SVN->getMaskElt(i);
15645       if (Idx >= (int)NumElts) {
15646         Idx = -1;
15647         Changed = true;
15648       }
15649       NewMask.push_back(Idx);
15650     }
15651     if (Changed)
15652       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15653   }
15654 
15655   // Simplify shuffle mask if a referenced element is UNDEF.
15656   if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
15657     return V;
15658 
15659   // A shuffle of a single vector that is a splat can always be folded.
15660   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15661     if (N1->isUndef() && N0Shuf->isSplat())
15662       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15663 
15664   // If it is a splat, check if the argument vector is another splat or a
15665   // build_vector.
15666   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15667     SDNode *V = N0.getNode();
15668 
15669     // If this is a bit convert that changes the element type of the vector but
15670     // not the number of vector elements, look through it.  Be careful not to
15671     // look though conversions that change things like v4f32 to v2f64.
15672     if (V->getOpcode() == ISD::BITCAST) {
15673       SDValue ConvInput = V->getOperand(0);
15674       if (ConvInput.getValueType().isVector() &&
15675           ConvInput.getValueType().getVectorNumElements() == NumElts)
15676         V = ConvInput.getNode();
15677     }
15678 
15679     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15680       assert(V->getNumOperands() == NumElts &&
15681              "BUILD_VECTOR has wrong number of operands");
15682       SDValue Base;
15683       bool AllSame = true;
15684       for (unsigned i = 0; i != NumElts; ++i) {
15685         if (!V->getOperand(i).isUndef()) {
15686           Base = V->getOperand(i);
15687           break;
15688         }
15689       }
15690       // Splat of <u, u, u, u>, return <u, u, u, u>
15691       if (!Base.getNode())
15692         return N0;
15693       for (unsigned i = 0; i != NumElts; ++i) {
15694         if (V->getOperand(i) != Base) {
15695           AllSame = false;
15696           break;
15697         }
15698       }
15699       // Splat of <x, x, x, x>, return <x, x, x, x>
15700       if (AllSame)
15701         return N0;
15702 
15703       // Canonicalize any other splat as a build_vector.
15704       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15705       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15706       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15707 
15708       // We may have jumped through bitcasts, so the type of the
15709       // BUILD_VECTOR may not match the type of the shuffle.
15710       if (V->getValueType(0) != VT)
15711         NewBV = DAG.getBitcast(VT, NewBV);
15712       return NewBV;
15713     }
15714   }
15715 
15716   // There are various patterns used to build up a vector from smaller vectors,
15717   // subvectors, or elements. Scan chains of these and replace unused insertions
15718   // or components with undef.
15719   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15720     return S;
15721 
15722   // Match shuffles that can be converted to any_vector_extend_in_reg.
15723   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
15724     return V;
15725 
15726   // Combine "truncate_vector_in_reg" style shuffles.
15727   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15728     return V;
15729 
15730   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15731       Level < AfterLegalizeVectorOps &&
15732       (N1.isUndef() ||
15733       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15734        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15735     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15736       return V;
15737   }
15738 
15739   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15740   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15741   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15742     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15743       return Res;
15744 
15745   // If this shuffle only has a single input that is a bitcasted shuffle,
15746   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15747   // back to their original types.
15748   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15749       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15750       TLI.isTypeLegal(VT)) {
15751 
15752     // Peek through the bitcast only if there is one user.
15753     SDValue BC0 = N0;
15754     while (BC0.getOpcode() == ISD::BITCAST) {
15755       if (!BC0.hasOneUse())
15756         break;
15757       BC0 = BC0.getOperand(0);
15758     }
15759 
15760     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15761       if (Scale == 1)
15762         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15763 
15764       SmallVector<int, 8> NewMask;
15765       for (int M : Mask)
15766         for (int s = 0; s != Scale; ++s)
15767           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15768       return NewMask;
15769     };
15770 
15771     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15772       EVT SVT = VT.getScalarType();
15773       EVT InnerVT = BC0->getValueType(0);
15774       EVT InnerSVT = InnerVT.getScalarType();
15775 
15776       // Determine which shuffle works with the smaller scalar type.
15777       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15778       EVT ScaleSVT = ScaleVT.getScalarType();
15779 
15780       if (TLI.isTypeLegal(ScaleVT) &&
15781           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15782           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15783 
15784         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15785         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15786 
15787         // Scale the shuffle masks to the smaller scalar type.
15788         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15789         SmallVector<int, 8> InnerMask =
15790             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15791         SmallVector<int, 8> OuterMask =
15792             ScaleShuffleMask(SVN->getMask(), OuterScale);
15793 
15794         // Merge the shuffle masks.
15795         SmallVector<int, 8> NewMask;
15796         for (int M : OuterMask)
15797           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15798 
15799         // Test for shuffle mask legality over both commutations.
15800         SDValue SV0 = BC0->getOperand(0);
15801         SDValue SV1 = BC0->getOperand(1);
15802         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15803         if (!LegalMask) {
15804           std::swap(SV0, SV1);
15805           ShuffleVectorSDNode::commuteMask(NewMask);
15806           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15807         }
15808 
15809         if (LegalMask) {
15810           SV0 = DAG.getBitcast(ScaleVT, SV0);
15811           SV1 = DAG.getBitcast(ScaleVT, SV1);
15812           return DAG.getBitcast(
15813               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15814         }
15815       }
15816     }
15817   }
15818 
15819   // Canonicalize shuffles according to rules:
15820   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15821   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15822   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15823   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15824       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15825       TLI.isTypeLegal(VT)) {
15826     // The incoming shuffle must be of the same type as the result of the
15827     // current shuffle.
15828     assert(N1->getOperand(0).getValueType() == VT &&
15829            "Shuffle types don't match");
15830 
15831     SDValue SV0 = N1->getOperand(0);
15832     SDValue SV1 = N1->getOperand(1);
15833     bool HasSameOp0 = N0 == SV0;
15834     bool IsSV1Undef = SV1.isUndef();
15835     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15836       // Commute the operands of this shuffle so that next rule
15837       // will trigger.
15838       return DAG.getCommutedVectorShuffle(*SVN);
15839   }
15840 
15841   // Try to fold according to rules:
15842   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15843   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15844   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15845   // Don't try to fold shuffles with illegal type.
15846   // Only fold if this shuffle is the only user of the other shuffle.
15847   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15848       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15849     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15850 
15851     // Don't try to fold splats; they're likely to simplify somehow, or they
15852     // might be free.
15853     if (OtherSV->isSplat())
15854       return SDValue();
15855 
15856     // The incoming shuffle must be of the same type as the result of the
15857     // current shuffle.
15858     assert(OtherSV->getOperand(0).getValueType() == VT &&
15859            "Shuffle types don't match");
15860 
15861     SDValue SV0, SV1;
15862     SmallVector<int, 4> Mask;
15863     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15864     // operand, and SV1 as the second operand.
15865     for (unsigned i = 0; i != NumElts; ++i) {
15866       int Idx = SVN->getMaskElt(i);
15867       if (Idx < 0) {
15868         // Propagate Undef.
15869         Mask.push_back(Idx);
15870         continue;
15871       }
15872 
15873       SDValue CurrentVec;
15874       if (Idx < (int)NumElts) {
15875         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15876         // shuffle mask to identify which vector is actually referenced.
15877         Idx = OtherSV->getMaskElt(Idx);
15878         if (Idx < 0) {
15879           // Propagate Undef.
15880           Mask.push_back(Idx);
15881           continue;
15882         }
15883 
15884         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15885                                            : OtherSV->getOperand(1);
15886       } else {
15887         // This shuffle index references an element within N1.
15888         CurrentVec = N1;
15889       }
15890 
15891       // Simple case where 'CurrentVec' is UNDEF.
15892       if (CurrentVec.isUndef()) {
15893         Mask.push_back(-1);
15894         continue;
15895       }
15896 
15897       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15898       // will be the first or second operand of the combined shuffle.
15899       Idx = Idx % NumElts;
15900       if (!SV0.getNode() || SV0 == CurrentVec) {
15901         // Ok. CurrentVec is the left hand side.
15902         // Update the mask accordingly.
15903         SV0 = CurrentVec;
15904         Mask.push_back(Idx);
15905         continue;
15906       }
15907 
15908       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15909       if (SV1.getNode() && SV1 != CurrentVec)
15910         return SDValue();
15911 
15912       // Ok. CurrentVec is the right hand side.
15913       // Update the mask accordingly.
15914       SV1 = CurrentVec;
15915       Mask.push_back(Idx + NumElts);
15916     }
15917 
15918     // Check if all indices in Mask are Undef. In case, propagate Undef.
15919     bool isUndefMask = true;
15920     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15921       isUndefMask &= Mask[i] < 0;
15922 
15923     if (isUndefMask)
15924       return DAG.getUNDEF(VT);
15925 
15926     if (!SV0.getNode())
15927       SV0 = DAG.getUNDEF(VT);
15928     if (!SV1.getNode())
15929       SV1 = DAG.getUNDEF(VT);
15930 
15931     // Avoid introducing shuffles with illegal mask.
15932     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15933       ShuffleVectorSDNode::commuteMask(Mask);
15934 
15935       if (!TLI.isShuffleMaskLegal(Mask, VT))
15936         return SDValue();
15937 
15938       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15939       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15940       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15941       std::swap(SV0, SV1);
15942     }
15943 
15944     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15945     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15946     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15947     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15948   }
15949 
15950   return SDValue();
15951 }
15952 
15953 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
15954   SDValue InVal = N->getOperand(0);
15955   EVT VT = N->getValueType(0);
15956 
15957   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
15958   // with a VECTOR_SHUFFLE and possible truncate.
15959   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15960     SDValue InVec = InVal->getOperand(0);
15961     SDValue EltNo = InVal->getOperand(1);
15962     auto InVecT = InVec.getValueType();
15963     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
15964       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
15965       int Elt = C0->getZExtValue();
15966       NewMask[0] = Elt;
15967       SDValue Val;
15968       // If we have an implict truncate do truncate here as long as it's legal.
15969       // if it's not legal, this should
15970       if (VT.getScalarType() != InVal.getValueType() &&
15971           InVal.getValueType().isScalarInteger() &&
15972           isTypeLegal(VT.getScalarType())) {
15973         Val =
15974             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
15975         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
15976       }
15977       if (VT.getScalarType() == InVecT.getScalarType() &&
15978           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
15979           TLI.isShuffleMaskLegal(NewMask, VT)) {
15980         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
15981                                    DAG.getUNDEF(InVecT), NewMask);
15982         // If the initial vector is the correct size this shuffle is a
15983         // valid result.
15984         if (VT == InVecT)
15985           return Val;
15986         // If not we must truncate the vector.
15987         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
15988           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15989           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
15990           EVT SubVT =
15991               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
15992                                VT.getVectorNumElements());
15993           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
15994                             ZeroIdx);
15995           return Val;
15996         }
15997       }
15998     }
15999   }
16000 
16001   return SDValue();
16002 }
16003 
16004 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16005   EVT VT = N->getValueType(0);
16006   SDValue N0 = N->getOperand(0);
16007   SDValue N1 = N->getOperand(1);
16008   SDValue N2 = N->getOperand(2);
16009 
16010   // If inserting an UNDEF, just return the original vector.
16011   if (N1.isUndef())
16012     return N0;
16013 
16014   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16015   // us to pull BITCASTs from input to output.
16016   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16017     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16018       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16019 
16020   // If this is an insert of an extracted vector into an undef vector, we can
16021   // just use the input to the extract.
16022   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16023       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16024     return N1.getOperand(0);
16025 
16026   // If we are inserting a bitcast value into an undef, with the same
16027   // number of elements, just use the bitcast input of the extract.
16028   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16029   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16030   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16031       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16032       N1.getOperand(0).getOperand(1) == N2 &&
16033       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16034           VT.getVectorNumElements()) {
16035     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16036   }
16037 
16038   // If both N1 and N2 are bitcast values on which insert_subvector
16039   // would makes sense, pull the bitcast through.
16040   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16041   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16042   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16043     SDValue CN0 = N0.getOperand(0);
16044     SDValue CN1 = N1.getOperand(0);
16045     if (CN0.getValueType().getVectorElementType() ==
16046             CN1.getValueType().getVectorElementType() &&
16047         CN0.getValueType().getVectorNumElements() ==
16048             VT.getVectorNumElements()) {
16049       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16050                                       CN0.getValueType(), CN0, CN1, N2);
16051       return DAG.getBitcast(VT, NewINSERT);
16052     }
16053   }
16054 
16055   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16056   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16057   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16058   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16059       N0.getOperand(1).getValueType() == N1.getValueType() &&
16060       N0.getOperand(2) == N2)
16061     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16062                        N1, N2);
16063 
16064   if (!isa<ConstantSDNode>(N2))
16065     return SDValue();
16066 
16067   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16068 
16069   // Canonicalize insert_subvector dag nodes.
16070   // Example:
16071   // (insert_subvector (insert_subvector A, Idx0), Idx1)
16072   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16073   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16074       N1.getValueType() == N0.getOperand(1).getValueType() &&
16075       isa<ConstantSDNode>(N0.getOperand(2))) {
16076     unsigned OtherIdx = N0.getConstantOperandVal(2);
16077     if (InsIdx < OtherIdx) {
16078       // Swap nodes.
16079       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16080                                   N0.getOperand(0), N1, N2);
16081       AddToWorklist(NewOp.getNode());
16082       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16083                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16084     }
16085   }
16086 
16087   // If the input vector is a concatenation, and the insert replaces
16088   // one of the pieces, we can optimize into a single concat_vectors.
16089   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
16090       N0.getOperand(0).getValueType() == N1.getValueType()) {
16091     unsigned Factor = N1.getValueType().getVectorNumElements();
16092 
16093     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16094     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16095 
16096     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16097   }
16098 
16099   return SDValue();
16100 }
16101 
16102 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16103   SDValue N0 = N->getOperand(0);
16104 
16105   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16106   if (N0->getOpcode() == ISD::FP16_TO_FP)
16107     return N0->getOperand(0);
16108 
16109   return SDValue();
16110 }
16111 
16112 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16113   SDValue N0 = N->getOperand(0);
16114 
16115   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16116   if (N0->getOpcode() == ISD::AND) {
16117     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16118     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
16119       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16120                          N0.getOperand(0));
16121     }
16122   }
16123 
16124   return SDValue();
16125 }
16126 
16127 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16128 /// with the destination vector and a zero vector.
16129 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16130 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
16131 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16132   EVT VT = N->getValueType(0);
16133   SDValue LHS = N->getOperand(0);
16134   SDValue RHS = peekThroughBitcast(N->getOperand(1));
16135   SDLoc DL(N);
16136 
16137   // Make sure we're not running after operation legalization where it
16138   // may have custom lowered the vector shuffles.
16139   if (LegalOperations)
16140     return SDValue();
16141 
16142   if (N->getOpcode() != ISD::AND)
16143     return SDValue();
16144 
16145   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
16146     return SDValue();
16147 
16148   EVT RVT = RHS.getValueType();
16149   unsigned NumElts = RHS.getNumOperands();
16150 
16151   // Attempt to create a valid clear mask, splitting the mask into
16152   // sub elements and checking to see if each is
16153   // all zeros or all ones - suitable for shuffle masking.
16154   auto BuildClearMask = [&](int Split) {
16155     int NumSubElts = NumElts * Split;
16156     int NumSubBits = RVT.getScalarSizeInBits() / Split;
16157 
16158     SmallVector<int, 8> Indices;
16159     for (int i = 0; i != NumSubElts; ++i) {
16160       int EltIdx = i / Split;
16161       int SubIdx = i % Split;
16162       SDValue Elt = RHS.getOperand(EltIdx);
16163       if (Elt.isUndef()) {
16164         Indices.push_back(-1);
16165         continue;
16166       }
16167 
16168       APInt Bits;
16169       if (isa<ConstantSDNode>(Elt))
16170         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16171       else if (isa<ConstantFPSDNode>(Elt))
16172         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16173       else
16174         return SDValue();
16175 
16176       // Extract the sub element from the constant bit mask.
16177       if (DAG.getDataLayout().isBigEndian()) {
16178         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16179       } else {
16180         Bits.lshrInPlace(SubIdx * NumSubBits);
16181       }
16182 
16183       if (Split > 1)
16184         Bits = Bits.trunc(NumSubBits);
16185 
16186       if (Bits.isAllOnesValue())
16187         Indices.push_back(i);
16188       else if (Bits == 0)
16189         Indices.push_back(i + NumSubElts);
16190       else
16191         return SDValue();
16192     }
16193 
16194     // Let's see if the target supports this vector_shuffle.
16195     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16196     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16197     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16198       return SDValue();
16199 
16200     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16201     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16202                                                    DAG.getBitcast(ClearVT, LHS),
16203                                                    Zero, Indices));
16204   };
16205 
16206   // Determine maximum split level (byte level masking).
16207   int MaxSplit = 1;
16208   if (RVT.getScalarSizeInBits() % 8 == 0)
16209     MaxSplit = RVT.getScalarSizeInBits() / 8;
16210 
16211   for (int Split = 1; Split <= MaxSplit; ++Split)
16212     if (RVT.getScalarSizeInBits() % Split == 0)
16213       if (SDValue S = BuildClearMask(Split))
16214         return S;
16215 
16216   return SDValue();
16217 }
16218 
16219 /// Visit a binary vector operation, like ADD.
16220 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16221   assert(N->getValueType(0).isVector() &&
16222          "SimplifyVBinOp only works on vectors!");
16223 
16224   SDValue LHS = N->getOperand(0);
16225   SDValue RHS = N->getOperand(1);
16226   SDValue Ops[] = {LHS, RHS};
16227 
16228   // See if we can constant fold the vector operation.
16229   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16230           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16231     return Fold;
16232 
16233   // Try to convert a constant mask AND into a shuffle clear mask.
16234   if (SDValue Shuffle = XformToShuffleWithZero(N))
16235     return Shuffle;
16236 
16237   // Type legalization might introduce new shuffles in the DAG.
16238   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16239   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
16240   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
16241       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
16242       LHS.getOperand(1).isUndef() &&
16243       RHS.getOperand(1).isUndef()) {
16244     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16245     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16246 
16247     if (SVN0->getMask().equals(SVN1->getMask())) {
16248       EVT VT = N->getValueType(0);
16249       SDValue UndefVector = LHS.getOperand(1);
16250       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16251                                      LHS.getOperand(0), RHS.getOperand(0),
16252                                      N->getFlags());
16253       AddUsersToWorklist(N);
16254       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16255                                   SVN0->getMask());
16256     }
16257   }
16258 
16259   return SDValue();
16260 }
16261 
16262 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16263                                     SDValue N2) {
16264   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
16265 
16266   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16267                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
16268 
16269   // If we got a simplified select_cc node back from SimplifySelectCC, then
16270   // break it down into a new SETCC node, and a new SELECT node, and then return
16271   // the SELECT node, since we were called with a SELECT node.
16272   if (SCC.getNode()) {
16273     // Check to see if we got a select_cc back (to turn into setcc/select).
16274     // Otherwise, just return whatever node we got back, like fabs.
16275     if (SCC.getOpcode() == ISD::SELECT_CC) {
16276       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16277                                   N0.getValueType(),
16278                                   SCC.getOperand(0), SCC.getOperand(1),
16279                                   SCC.getOperand(4));
16280       AddToWorklist(SETCC.getNode());
16281       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16282                            SCC.getOperand(2), SCC.getOperand(3));
16283     }
16284 
16285     return SCC;
16286   }
16287   return SDValue();
16288 }
16289 
16290 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16291 /// being selected between, see if we can simplify the select.  Callers of this
16292 /// should assume that TheSelect is deleted if this returns true.  As such, they
16293 /// should return the appropriate thing (e.g. the node) back to the top-level of
16294 /// the DAG combiner loop to avoid it being looked at.
16295 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16296                                     SDValue RHS) {
16297 
16298   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16299   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16300   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16301     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16302       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16303       SDValue Sqrt = RHS;
16304       ISD::CondCode CC;
16305       SDValue CmpLHS;
16306       const ConstantFPSDNode *Zero = nullptr;
16307 
16308       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16309         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16310         CmpLHS = TheSelect->getOperand(0);
16311         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16312       } else {
16313         // SELECT or VSELECT
16314         SDValue Cmp = TheSelect->getOperand(0);
16315         if (Cmp.getOpcode() == ISD::SETCC) {
16316           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16317           CmpLHS = Cmp.getOperand(0);
16318           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16319         }
16320       }
16321       if (Zero && Zero->isZero() &&
16322           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16323           CC == ISD::SETULT || CC == ISD::SETLT)) {
16324         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16325         CombineTo(TheSelect, Sqrt);
16326         return true;
16327       }
16328     }
16329   }
16330   // Cannot simplify select with vector condition
16331   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16332 
16333   // If this is a select from two identical things, try to pull the operation
16334   // through the select.
16335   if (LHS.getOpcode() != RHS.getOpcode() ||
16336       !LHS.hasOneUse() || !RHS.hasOneUse())
16337     return false;
16338 
16339   // If this is a load and the token chain is identical, replace the select
16340   // of two loads with a load through a select of the address to load from.
16341   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16342   // constants have been dropped into the constant pool.
16343   if (LHS.getOpcode() == ISD::LOAD) {
16344     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16345     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16346 
16347     // Token chains must be identical.
16348     if (LHS.getOperand(0) != RHS.getOperand(0) ||
16349         // Do not let this transformation reduce the number of volatile loads.
16350         LLD->isVolatile() || RLD->isVolatile() ||
16351         // FIXME: If either is a pre/post inc/dec load,
16352         // we'd need to split out the address adjustment.
16353         LLD->isIndexed() || RLD->isIndexed() ||
16354         // If this is an EXTLOAD, the VT's must match.
16355         LLD->getMemoryVT() != RLD->getMemoryVT() ||
16356         // If this is an EXTLOAD, the kind of extension must match.
16357         (LLD->getExtensionType() != RLD->getExtensionType() &&
16358          // The only exception is if one of the extensions is anyext.
16359          LLD->getExtensionType() != ISD::EXTLOAD &&
16360          RLD->getExtensionType() != ISD::EXTLOAD) ||
16361         // FIXME: this discards src value information.  This is
16362         // over-conservative. It would be beneficial to be able to remember
16363         // both potential memory locations.  Since we are discarding
16364         // src value info, don't do the transformation if the memory
16365         // locations are not in the default address space.
16366         LLD->getPointerInfo().getAddrSpace() != 0 ||
16367         RLD->getPointerInfo().getAddrSpace() != 0 ||
16368         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16369                                       LLD->getBasePtr().getValueType()))
16370       return false;
16371 
16372     // Check that the select condition doesn't reach either load.  If so,
16373     // folding this will induce a cycle into the DAG.  If not, this is safe to
16374     // xform, so create a select of the addresses.
16375     SDValue Addr;
16376     if (TheSelect->getOpcode() == ISD::SELECT) {
16377       SDNode *CondNode = TheSelect->getOperand(0).getNode();
16378       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16379           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16380         return false;
16381       // The loads must not depend on one another.
16382       if (LLD->isPredecessorOf(RLD) ||
16383           RLD->isPredecessorOf(LLD))
16384         return false;
16385       Addr = DAG.getSelect(SDLoc(TheSelect),
16386                            LLD->getBasePtr().getValueType(),
16387                            TheSelect->getOperand(0), LLD->getBasePtr(),
16388                            RLD->getBasePtr());
16389     } else {  // Otherwise SELECT_CC
16390       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16391       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16392 
16393       if ((LLD->hasAnyUseOfValue(1) &&
16394            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16395           (RLD->hasAnyUseOfValue(1) &&
16396            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16397         return false;
16398 
16399       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16400                          LLD->getBasePtr().getValueType(),
16401                          TheSelect->getOperand(0),
16402                          TheSelect->getOperand(1),
16403                          LLD->getBasePtr(), RLD->getBasePtr(),
16404                          TheSelect->getOperand(4));
16405     }
16406 
16407     SDValue Load;
16408     // It is safe to replace the two loads if they have different alignments,
16409     // but the new load must be the minimum (most restrictive) alignment of the
16410     // inputs.
16411     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16412     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16413     if (!RLD->isInvariant())
16414       MMOFlags &= ~MachineMemOperand::MOInvariant;
16415     if (!RLD->isDereferenceable())
16416       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16417     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16418       // FIXME: Discards pointer and AA info.
16419       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16420                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16421                          MMOFlags);
16422     } else {
16423       // FIXME: Discards pointer and AA info.
16424       Load = DAG.getExtLoad(
16425           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16426                                                   : LLD->getExtensionType(),
16427           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16428           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16429     }
16430 
16431     // Users of the select now use the result of the load.
16432     CombineTo(TheSelect, Load);
16433 
16434     // Users of the old loads now use the new load's chain.  We know the
16435     // old-load value is dead now.
16436     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16437     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16438     return true;
16439   }
16440 
16441   return false;
16442 }
16443 
16444 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16445 /// bitwise 'and'.
16446 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16447                                             SDValue N1, SDValue N2, SDValue N3,
16448                                             ISD::CondCode CC) {
16449   // If this is a select where the false operand is zero and the compare is a
16450   // check of the sign bit, see if we can perform the "gzip trick":
16451   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16452   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16453   EVT XType = N0.getValueType();
16454   EVT AType = N2.getValueType();
16455   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16456     return SDValue();
16457 
16458   // If the comparison is testing for a positive value, we have to invert
16459   // the sign bit mask, so only do that transform if the target has a bitwise
16460   // 'and not' instruction (the invert is free).
16461   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16462     // (X > -1) ? A : 0
16463     // (X >  0) ? X : 0 <-- This is canonical signed max.
16464     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16465       return SDValue();
16466   } else if (CC == ISD::SETLT) {
16467     // (X <  0) ? A : 0
16468     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16469     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16470       return SDValue();
16471   } else {
16472     return SDValue();
16473   }
16474 
16475   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16476   // constant.
16477   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16478   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16479   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16480     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16481     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16482     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16483     AddToWorklist(Shift.getNode());
16484 
16485     if (XType.bitsGT(AType)) {
16486       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16487       AddToWorklist(Shift.getNode());
16488     }
16489 
16490     if (CC == ISD::SETGT)
16491       Shift = DAG.getNOT(DL, Shift, AType);
16492 
16493     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16494   }
16495 
16496   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16497   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16498   AddToWorklist(Shift.getNode());
16499 
16500   if (XType.bitsGT(AType)) {
16501     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16502     AddToWorklist(Shift.getNode());
16503   }
16504 
16505   if (CC == ISD::SETGT)
16506     Shift = DAG.getNOT(DL, Shift, AType);
16507 
16508   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16509 }
16510 
16511 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16512 /// where 'cond' is the comparison specified by CC.
16513 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16514                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16515                                       bool NotExtCompare) {
16516   // (x ? y : y) -> y.
16517   if (N2 == N3) return N2;
16518 
16519   EVT VT = N2.getValueType();
16520   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16521   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16522 
16523   // Determine if the condition we're dealing with is constant
16524   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16525                               N0, N1, CC, DL, false);
16526   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16527 
16528   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16529     // fold select_cc true, x, y -> x
16530     // fold select_cc false, x, y -> y
16531     return !SCCC->isNullValue() ? N2 : N3;
16532   }
16533 
16534   // Check to see if we can simplify the select into an fabs node
16535   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16536     // Allow either -0.0 or 0.0
16537     if (CFP->isZero()) {
16538       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16539       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16540           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16541           N2 == N3.getOperand(0))
16542         return DAG.getNode(ISD::FABS, DL, VT, N0);
16543 
16544       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16545       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16546           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16547           N2.getOperand(0) == N3)
16548         return DAG.getNode(ISD::FABS, DL, VT, N3);
16549     }
16550   }
16551 
16552   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16553   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16554   // in it.  This is a win when the constant is not otherwise available because
16555   // it replaces two constant pool loads with one.  We only do this if the FP
16556   // type is known to be legal, because if it isn't, then we are before legalize
16557   // types an we want the other legalization to happen first (e.g. to avoid
16558   // messing with soft float) and if the ConstantFP is not legal, because if
16559   // it is legal, we may not need to store the FP constant in a constant pool.
16560   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16561     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16562       if (TLI.isTypeLegal(N2.getValueType()) &&
16563           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16564                TargetLowering::Legal &&
16565            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16566            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16567           // If both constants have multiple uses, then we won't need to do an
16568           // extra load, they are likely around in registers for other users.
16569           (TV->hasOneUse() || FV->hasOneUse())) {
16570         Constant *Elts[] = {
16571           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16572           const_cast<ConstantFP*>(TV->getConstantFPValue())
16573         };
16574         Type *FPTy = Elts[0]->getType();
16575         const DataLayout &TD = DAG.getDataLayout();
16576 
16577         // Create a ConstantArray of the two constants.
16578         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16579         SDValue CPIdx =
16580             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16581                                 TD.getPrefTypeAlignment(FPTy));
16582         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16583 
16584         // Get the offsets to the 0 and 1 element of the array so that we can
16585         // select between them.
16586         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16587         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16588         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16589 
16590         SDValue Cond = DAG.getSetCC(DL,
16591                                     getSetCCResultType(N0.getValueType()),
16592                                     N0, N1, CC);
16593         AddToWorklist(Cond.getNode());
16594         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16595                                           Cond, One, Zero);
16596         AddToWorklist(CstOffset.getNode());
16597         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16598                             CstOffset);
16599         AddToWorklist(CPIdx.getNode());
16600         return DAG.getLoad(
16601             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16602             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16603             Alignment);
16604       }
16605     }
16606 
16607   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16608     return V;
16609 
16610   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16611   // where y is has a single bit set.
16612   // A plaintext description would be, we can turn the SELECT_CC into an AND
16613   // when the condition can be materialized as an all-ones register.  Any
16614   // single bit-test can be materialized as an all-ones register with
16615   // shift-left and shift-right-arith.
16616   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16617       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16618     SDValue AndLHS = N0->getOperand(0);
16619     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16620     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16621       // Shift the tested bit over the sign bit.
16622       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16623       SDValue ShlAmt =
16624         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16625                         getShiftAmountTy(AndLHS.getValueType()));
16626       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16627 
16628       // Now arithmetic right shift it all the way over, so the result is either
16629       // all-ones, or zero.
16630       SDValue ShrAmt =
16631         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16632                         getShiftAmountTy(Shl.getValueType()));
16633       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16634 
16635       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16636     }
16637   }
16638 
16639   // fold select C, 16, 0 -> shl C, 4
16640   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16641       TLI.getBooleanContents(N0.getValueType()) ==
16642           TargetLowering::ZeroOrOneBooleanContent) {
16643 
16644     // If the caller doesn't want us to simplify this into a zext of a compare,
16645     // don't do it.
16646     if (NotExtCompare && N2C->isOne())
16647       return SDValue();
16648 
16649     // Get a SetCC of the condition
16650     // NOTE: Don't create a SETCC if it's not legal on this target.
16651     if (!LegalOperations ||
16652         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16653       SDValue Temp, SCC;
16654       // cast from setcc result type to select result type
16655       if (LegalTypes) {
16656         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16657                             N0, N1, CC);
16658         if (N2.getValueType().bitsLT(SCC.getValueType()))
16659           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16660                                         N2.getValueType());
16661         else
16662           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16663                              N2.getValueType(), SCC);
16664       } else {
16665         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16666         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16667                            N2.getValueType(), SCC);
16668       }
16669 
16670       AddToWorklist(SCC.getNode());
16671       AddToWorklist(Temp.getNode());
16672 
16673       if (N2C->isOne())
16674         return Temp;
16675 
16676       // shl setcc result by log2 n2c
16677       return DAG.getNode(
16678           ISD::SHL, DL, N2.getValueType(), Temp,
16679           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16680                           getShiftAmountTy(Temp.getValueType())));
16681     }
16682   }
16683 
16684   // Check to see if this is an integer abs.
16685   // select_cc setg[te] X,  0,  X, -X ->
16686   // select_cc setgt    X, -1,  X, -X ->
16687   // select_cc setl[te] X,  0, -X,  X ->
16688   // select_cc setlt    X,  1, -X,  X ->
16689   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16690   if (N1C) {
16691     ConstantSDNode *SubC = nullptr;
16692     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16693          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16694         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16695       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16696     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16697               (N1C->isOne() && CC == ISD::SETLT)) &&
16698              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16699       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16700 
16701     EVT XType = N0.getValueType();
16702     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16703       SDLoc DL(N0);
16704       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16705                                   N0,
16706                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16707                                          getShiftAmountTy(N0.getValueType())));
16708       SDValue Add = DAG.getNode(ISD::ADD, DL,
16709                                 XType, N0, Shift);
16710       AddToWorklist(Shift.getNode());
16711       AddToWorklist(Add.getNode());
16712       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16713     }
16714   }
16715 
16716   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16717   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16718   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16719   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16720   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16721   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16722   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16723   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16724   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16725     SDValue ValueOnZero = N2;
16726     SDValue Count = N3;
16727     // If the condition is NE instead of E, swap the operands.
16728     if (CC == ISD::SETNE)
16729       std::swap(ValueOnZero, Count);
16730     // Check if the value on zero is a constant equal to the bits in the type.
16731     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16732       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16733         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16734         // legal, combine to just cttz.
16735         if ((Count.getOpcode() == ISD::CTTZ ||
16736              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16737             N0 == Count.getOperand(0) &&
16738             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16739           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16740         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16741         // legal, combine to just ctlz.
16742         if ((Count.getOpcode() == ISD::CTLZ ||
16743              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16744             N0 == Count.getOperand(0) &&
16745             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16746           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16747       }
16748     }
16749   }
16750 
16751   return SDValue();
16752 }
16753 
16754 /// This is a stub for TargetLowering::SimplifySetCC.
16755 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16756                                    ISD::CondCode Cond, const SDLoc &DL,
16757                                    bool foldBooleans) {
16758   TargetLowering::DAGCombinerInfo
16759     DagCombineInfo(DAG, Level, false, this);
16760   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16761 }
16762 
16763 /// Given an ISD::SDIV node expressing a divide by constant, return
16764 /// a DAG expression to select that will generate the same value by multiplying
16765 /// by a magic number.
16766 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16767 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16768   // when optimising for minimum size, we don't want to expand a div to a mul
16769   // and a shift.
16770   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16771     return SDValue();
16772 
16773   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16774   if (!C)
16775     return SDValue();
16776 
16777   // Avoid division by zero.
16778   if (C->isNullValue())
16779     return SDValue();
16780 
16781   std::vector<SDNode*> Built;
16782   SDValue S =
16783       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16784 
16785   for (SDNode *N : Built)
16786     AddToWorklist(N);
16787   return S;
16788 }
16789 
16790 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16791 /// DAG expression that will generate the same value by right shifting.
16792 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16793   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16794   if (!C)
16795     return SDValue();
16796 
16797   // Avoid division by zero.
16798   if (C->isNullValue())
16799     return SDValue();
16800 
16801   std::vector<SDNode *> Built;
16802   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16803 
16804   for (SDNode *N : Built)
16805     AddToWorklist(N);
16806   return S;
16807 }
16808 
16809 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16810 /// expression that will generate the same value by multiplying by a magic
16811 /// number.
16812 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16813 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16814   // when optimising for minimum size, we don't want to expand a div to a mul
16815   // and a shift.
16816   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16817     return SDValue();
16818 
16819   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16820   if (!C)
16821     return SDValue();
16822 
16823   // Avoid division by zero.
16824   if (C->isNullValue())
16825     return SDValue();
16826 
16827   std::vector<SDNode*> Built;
16828   SDValue S =
16829       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16830 
16831   for (SDNode *N : Built)
16832     AddToWorklist(N);
16833   return S;
16834 }
16835 
16836 /// Determines the LogBase2 value for a non-null input value using the
16837 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16838 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16839   EVT VT = V.getValueType();
16840   unsigned EltBits = VT.getScalarSizeInBits();
16841   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16842   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16843   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16844   return LogBase2;
16845 }
16846 
16847 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16848 /// For the reciprocal, we need to find the zero of the function:
16849 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16850 ///     =>
16851 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16852 ///     does not require additional intermediate precision]
16853 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16854   if (Level >= AfterLegalizeDAG)
16855     return SDValue();
16856 
16857   // TODO: Handle half and/or extended types?
16858   EVT VT = Op.getValueType();
16859   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16860     return SDValue();
16861 
16862   // If estimates are explicitly disabled for this function, we're done.
16863   MachineFunction &MF = DAG.getMachineFunction();
16864   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16865   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16866     return SDValue();
16867 
16868   // Estimates may be explicitly enabled for this type with a custom number of
16869   // refinement steps.
16870   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16871   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16872     AddToWorklist(Est.getNode());
16873 
16874     if (Iterations) {
16875       EVT VT = Op.getValueType();
16876       SDLoc DL(Op);
16877       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16878 
16879       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16880       for (int i = 0; i < Iterations; ++i) {
16881         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16882         AddToWorklist(NewEst.getNode());
16883 
16884         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16885         AddToWorklist(NewEst.getNode());
16886 
16887         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16888         AddToWorklist(NewEst.getNode());
16889 
16890         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16891         AddToWorklist(Est.getNode());
16892       }
16893     }
16894     return Est;
16895   }
16896 
16897   return SDValue();
16898 }
16899 
16900 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16901 /// For the reciprocal sqrt, we need to find the zero of the function:
16902 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16903 ///     =>
16904 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16905 /// As a result, we precompute A/2 prior to the iteration loop.
16906 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16907                                          unsigned Iterations,
16908                                          SDNodeFlags Flags, bool Reciprocal) {
16909   EVT VT = Arg.getValueType();
16910   SDLoc DL(Arg);
16911   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16912 
16913   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16914   // this entire sequence requires only one FP constant.
16915   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16916   AddToWorklist(HalfArg.getNode());
16917 
16918   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16919   AddToWorklist(HalfArg.getNode());
16920 
16921   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16922   for (unsigned i = 0; i < Iterations; ++i) {
16923     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16924     AddToWorklist(NewEst.getNode());
16925 
16926     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16927     AddToWorklist(NewEst.getNode());
16928 
16929     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16930     AddToWorklist(NewEst.getNode());
16931 
16932     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16933     AddToWorklist(Est.getNode());
16934   }
16935 
16936   // If non-reciprocal square root is requested, multiply the result by Arg.
16937   if (!Reciprocal) {
16938     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16939     AddToWorklist(Est.getNode());
16940   }
16941 
16942   return Est;
16943 }
16944 
16945 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16946 /// For the reciprocal sqrt, we need to find the zero of the function:
16947 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16948 ///     =>
16949 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16950 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16951                                          unsigned Iterations,
16952                                          SDNodeFlags Flags, bool Reciprocal) {
16953   EVT VT = Arg.getValueType();
16954   SDLoc DL(Arg);
16955   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
16956   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
16957 
16958   // This routine must enter the loop below to work correctly
16959   // when (Reciprocal == false).
16960   assert(Iterations > 0);
16961 
16962   // Newton iterations for reciprocal square root:
16963   // E = (E * -0.5) * ((A * E) * E + -3.0)
16964   for (unsigned i = 0; i < Iterations; ++i) {
16965     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
16966     AddToWorklist(AE.getNode());
16967 
16968     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
16969     AddToWorklist(AEE.getNode());
16970 
16971     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
16972     AddToWorklist(RHS.getNode());
16973 
16974     // When calculating a square root at the last iteration build:
16975     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
16976     // (notice a common subexpression)
16977     SDValue LHS;
16978     if (Reciprocal || (i + 1) < Iterations) {
16979       // RSQRT: LHS = (E * -0.5)
16980       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
16981     } else {
16982       // SQRT: LHS = (A * E) * -0.5
16983       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
16984     }
16985     AddToWorklist(LHS.getNode());
16986 
16987     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
16988     AddToWorklist(Est.getNode());
16989   }
16990 
16991   return Est;
16992 }
16993 
16994 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
16995 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
16996 /// Op can be zero.
16997 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
16998                                            bool Reciprocal) {
16999   if (Level >= AfterLegalizeDAG)
17000     return SDValue();
17001 
17002   // TODO: Handle half and/or extended types?
17003   EVT VT = Op.getValueType();
17004   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17005     return SDValue();
17006 
17007   // If estimates are explicitly disabled for this function, we're done.
17008   MachineFunction &MF = DAG.getMachineFunction();
17009   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17010   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17011     return SDValue();
17012 
17013   // Estimates may be explicitly enabled for this type with a custom number of
17014   // refinement steps.
17015   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17016 
17017   bool UseOneConstNR = false;
17018   if (SDValue Est =
17019       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17020                           Reciprocal)) {
17021     AddToWorklist(Est.getNode());
17022 
17023     if (Iterations) {
17024       Est = UseOneConstNR
17025             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17026             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17027 
17028       if (!Reciprocal) {
17029         // Unfortunately, Est is now NaN if the input was exactly 0.0.
17030         // Select out this case and force the answer to 0.0.
17031         EVT VT = Op.getValueType();
17032         SDLoc DL(Op);
17033 
17034         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17035         EVT CCVT = getSetCCResultType(VT);
17036         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17037         AddToWorklist(ZeroCmp.getNode());
17038 
17039         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
17040                           ZeroCmp, FPZero, Est);
17041         AddToWorklist(Est.getNode());
17042       }
17043     }
17044     return Est;
17045   }
17046 
17047   return SDValue();
17048 }
17049 
17050 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17051   return buildSqrtEstimateImpl(Op, Flags, true);
17052 }
17053 
17054 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17055   return buildSqrtEstimateImpl(Op, Flags, false);
17056 }
17057 
17058 /// Return true if base is a frame index, which is known not to alias with
17059 /// anything but itself.  Provides base object and offset as results.
17060 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
17061                            const GlobalValue *&GV, const void *&CV) {
17062   // Assume it is a primitive operation.
17063   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
17064 
17065   // If it's an adding a simple constant then integrate the offset.
17066   if (Base.getOpcode() == ISD::ADD) {
17067     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
17068       Base = Base.getOperand(0);
17069       Offset += C->getSExtValue();
17070     }
17071   }
17072 
17073   // Return the underlying GlobalValue, and update the Offset.  Return false
17074   // for GlobalAddressSDNode since the same GlobalAddress may be represented
17075   // by multiple nodes with different offsets.
17076   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
17077     GV = G->getGlobal();
17078     Offset += G->getOffset();
17079     return false;
17080   }
17081 
17082   // Return the underlying Constant value, and update the Offset.  Return false
17083   // for ConstantSDNodes since the same constant pool entry may be represented
17084   // by multiple nodes with different offsets.
17085   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
17086     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
17087                                          : (const void *)C->getConstVal();
17088     Offset += C->getOffset();
17089     return false;
17090   }
17091   // If it's any of the following then it can't alias with anything but itself.
17092   return isa<FrameIndexSDNode>(Base);
17093 }
17094 
17095 /// Return true if there is any possibility that the two addresses overlap.
17096 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17097   // If they are the same then they must be aliases.
17098   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17099 
17100   // If they are both volatile then they cannot be reordered.
17101   if (Op0->isVolatile() && Op1->isVolatile()) return true;
17102 
17103   // If one operation reads from invariant memory, and the other may store, they
17104   // cannot alias. These should really be checking the equivalent of mayWrite,
17105   // but it only matters for memory nodes other than load /store.
17106   if (Op0->isInvariant() && Op1->writeMem())
17107     return false;
17108 
17109   if (Op1->isInvariant() && Op0->writeMem())
17110     return false;
17111 
17112   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
17113   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
17114 
17115   // Check for BaseIndexOffset matching.
17116   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
17117   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
17118   int64_t PtrDiff;
17119   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17120     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17121 
17122   // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17123   // able to calculate their relative offset if at least one arises
17124   // from an alloca. However, these allocas cannot overlap and we
17125   // can infer there is no alias.
17126   if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17127     if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17128       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17129       // If the base are the same frame index but the we couldn't find a
17130       // constant offset, (indices are different) be conservative.
17131       if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
17132                      !MFI.isFixedObjectIndex(B->getIndex())))
17133         return false;
17134     }
17135 
17136   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
17137   // modified to use BaseIndexOffset.
17138 
17139   // Gather base node and offset information.
17140   SDValue Base0, Base1;
17141   int64_t Offset0, Offset1;
17142   const GlobalValue *GV0, *GV1;
17143   const void *CV0, *CV1;
17144   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
17145                                       Base0, Offset0, GV0, CV0);
17146   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
17147                                       Base1, Offset1, GV1, CV1);
17148 
17149   // If they have the same base address, then check to see if they overlap.
17150   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
17151     return !((Offset0 + NumBytes0) <= Offset1 ||
17152              (Offset1 + NumBytes1) <= Offset0);
17153 
17154   // It is possible for different frame indices to alias each other, mostly
17155   // when tail call optimization reuses return address slots for arguments.
17156   // To catch this case, look up the actual index of frame indices to compute
17157   // the real alias relationship.
17158   if (IsFrameIndex0 && IsFrameIndex1) {
17159     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17160     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
17161     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
17162     return !((Offset0 + NumBytes0) <= Offset1 ||
17163              (Offset1 + NumBytes1) <= Offset0);
17164   }
17165 
17166   // Otherwise, if we know what the bases are, and they aren't identical, then
17167   // we know they cannot alias.
17168   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
17169     return false;
17170 
17171   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
17172   // compared to the size and offset of the access, we may be able to prove they
17173   // do not alias. This check is conservative for now to catch cases created by
17174   // splitting vector types.
17175   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17176   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17177   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17178   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17179   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
17180       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
17181     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17182     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17183 
17184     // There is no overlap between these relatively aligned accesses of similar
17185     // size. Return no alias.
17186     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17187         (OffAlign1 + NumBytes1) <= OffAlign0)
17188       return false;
17189   }
17190 
17191   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17192                    ? CombinerGlobalAA
17193                    : DAG.getSubtarget().useAA();
17194 #ifndef NDEBUG
17195   if (CombinerAAOnlyFunc.getNumOccurrences() &&
17196       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17197     UseAA = false;
17198 #endif
17199 
17200   if (UseAA && AA &&
17201       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
17202     // Use alias analysis information.
17203     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17204     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17205     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17206     AliasResult AAResult =
17207         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17208                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
17209                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17210                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
17211     if (AAResult == NoAlias)
17212       return false;
17213   }
17214 
17215   // Otherwise we have to assume they alias.
17216   return true;
17217 }
17218 
17219 /// Walk up chain skipping non-aliasing memory nodes,
17220 /// looking for aliasing nodes and adding them to the Aliases vector.
17221 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17222                                    SmallVectorImpl<SDValue> &Aliases) {
17223   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
17224   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
17225 
17226   // Get alias information for node.
17227   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17228 
17229   // Starting off.
17230   Chains.push_back(OriginalChain);
17231   unsigned Depth = 0;
17232 
17233   // Look at each chain and determine if it is an alias.  If so, add it to the
17234   // aliases list.  If not, then continue up the chain looking for the next
17235   // candidate.
17236   while (!Chains.empty()) {
17237     SDValue Chain = Chains.pop_back_val();
17238 
17239     // For TokenFactor nodes, look at each operand and only continue up the
17240     // chain until we reach the depth limit.
17241     //
17242     // FIXME: The depth check could be made to return the last non-aliasing
17243     // chain we found before we hit a tokenfactor rather than the original
17244     // chain.
17245     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
17246       Aliases.clear();
17247       Aliases.push_back(OriginalChain);
17248       return;
17249     }
17250 
17251     // Don't bother if we've been before.
17252     if (!Visited.insert(Chain.getNode()).second)
17253       continue;
17254 
17255     switch (Chain.getOpcode()) {
17256     case ISD::EntryToken:
17257       // Entry token is ideal chain operand, but handled in FindBetterChain.
17258       break;
17259 
17260     case ISD::LOAD:
17261     case ISD::STORE: {
17262       // Get alias information for Chain.
17263       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17264           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17265 
17266       // If chain is alias then stop here.
17267       if (!(IsLoad && IsOpLoad) &&
17268           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
17269         Aliases.push_back(Chain);
17270       } else {
17271         // Look further up the chain.
17272         Chains.push_back(Chain.getOperand(0));
17273         ++Depth;
17274       }
17275       break;
17276     }
17277 
17278     case ISD::TokenFactor:
17279       // We have to check each of the operands of the token factor for "small"
17280       // token factors, so we queue them up.  Adding the operands to the queue
17281       // (stack) in reverse order maintains the original order and increases the
17282       // likelihood that getNode will find a matching token factor (CSE.)
17283       if (Chain.getNumOperands() > 16) {
17284         Aliases.push_back(Chain);
17285         break;
17286       }
17287       for (unsigned n = Chain.getNumOperands(); n;)
17288         Chains.push_back(Chain.getOperand(--n));
17289       ++Depth;
17290       break;
17291 
17292     case ISD::CopyFromReg:
17293       // Forward past CopyFromReg.
17294       Chains.push_back(Chain.getOperand(0));
17295       ++Depth;
17296       break;
17297 
17298     default:
17299       // For all other instructions we will just have to take what we can get.
17300       Aliases.push_back(Chain);
17301       break;
17302     }
17303   }
17304 }
17305 
17306 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17307 /// (aliasing node.)
17308 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17309   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
17310 
17311   // Accumulate all the aliases to this node.
17312   GatherAllAliases(N, OldChain, Aliases);
17313 
17314   // If no operands then chain to entry token.
17315   if (Aliases.size() == 0)
17316     return DAG.getEntryNode();
17317 
17318   // If a single operand then chain to it.  We don't need to revisit it.
17319   if (Aliases.size() == 1)
17320     return Aliases[0];
17321 
17322   // Construct a custom tailored token factor.
17323   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17324 }
17325 
17326 // This function tries to collect a bunch of potentially interesting
17327 // nodes to improve the chains of, all at once. This might seem
17328 // redundant, as this function gets called when visiting every store
17329 // node, so why not let the work be done on each store as it's visited?
17330 //
17331 // I believe this is mainly important because MergeConsecutiveStores
17332 // is unable to deal with merging stores of different sizes, so unless
17333 // we improve the chains of all the potential candidates up-front
17334 // before running MergeConsecutiveStores, it might only see some of
17335 // the nodes that will eventually be candidates, and then not be able
17336 // to go from a partially-merged state to the desired final
17337 // fully-merged state.
17338 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17339   // This holds the base pointer, index, and the offset in bytes from the base
17340   // pointer.
17341   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
17342 
17343   // We must have a base and an offset.
17344   if (!BasePtr.getBase().getNode())
17345     return false;
17346 
17347   // Do not handle stores to undef base pointers.
17348   if (BasePtr.getBase().isUndef())
17349     return false;
17350 
17351   SmallVector<StoreSDNode *, 8> ChainedStores;
17352   ChainedStores.push_back(St);
17353 
17354   // Walk up the chain and look for nodes with offsets from the same
17355   // base pointer. Stop when reaching an instruction with a different kind
17356   // or instruction which has a different base pointer.
17357   StoreSDNode *Index = St;
17358   while (Index) {
17359     // If the chain has more than one use, then we can't reorder the mem ops.
17360     if (Index != St && !SDValue(Index, 0)->hasOneUse())
17361       break;
17362 
17363     if (Index->isVolatile() || Index->isIndexed())
17364       break;
17365 
17366     // Find the base pointer and offset for this memory node.
17367     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
17368 
17369     // Check that the base pointer is the same as the original one.
17370     if (!BasePtr.equalBaseIndex(Ptr, DAG))
17371       break;
17372 
17373     // Walk up the chain to find the next store node, ignoring any
17374     // intermediate loads. Any other kind of node will halt the loop.
17375     SDNode *NextInChain = Index->getChain().getNode();
17376     while (true) {
17377       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17378         // We found a store node. Use it for the next iteration.
17379         if (STn->isVolatile() || STn->isIndexed()) {
17380           Index = nullptr;
17381           break;
17382         }
17383         ChainedStores.push_back(STn);
17384         Index = STn;
17385         break;
17386       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17387         NextInChain = Ldn->getChain().getNode();
17388         continue;
17389       } else {
17390         Index = nullptr;
17391         break;
17392       }
17393     } // end while
17394   }
17395 
17396   // At this point, ChainedStores lists all of the Store nodes
17397   // reachable by iterating up through chain nodes matching the above
17398   // conditions.  For each such store identified, try to find an
17399   // earlier chain to attach the store to which won't violate the
17400   // required ordering.
17401   bool MadeChangeToSt = false;
17402   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17403 
17404   for (StoreSDNode *ChainedStore : ChainedStores) {
17405     SDValue Chain = ChainedStore->getChain();
17406     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17407 
17408     if (Chain != BetterChain) {
17409       if (ChainedStore == St)
17410         MadeChangeToSt = true;
17411       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17412     }
17413   }
17414 
17415   // Do all replacements after finding the replacements to make to avoid making
17416   // the chains more complicated by introducing new TokenFactors.
17417   for (auto Replacement : BetterChains)
17418     replaceStoreChain(Replacement.first, Replacement.second);
17419 
17420   return MadeChangeToSt;
17421 }
17422 
17423 /// This is the entry point for the file.
17424 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17425                            CodeGenOpt::Level OptLevel) {
17426   /// This is the main entry point to this class.
17427   DAGCombiner(*this, AA, OptLevel).Run(Level);
17428 }
17429