1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
29 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetOptions.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
43 #include "llvm/Target/TargetSubtargetInfo.h"
44 #include <algorithm>
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "dagcombine"
48 
49 STATISTIC(NodesCombined   , "Number of dag nodes combined");
50 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
51 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
52 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
53 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
54 STATISTIC(SlicedLoads, "Number of load sliced");
55 
56 namespace {
57   static cl::opt<bool>
58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61   static cl::opt<bool>
62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63                cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
66   static cl::opt<std::string>
67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68                cl::desc("Only use DAG-combiner alias analysis in this"
69                         " function"));
70 #endif
71 
72   /// Hidden option to stress test load slicing, i.e., when this option
73   /// is enabled, load slicing bypasses most of its profitability guards.
74   static cl::opt<bool>
75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76                     cl::desc("Bypass the profitability model of load "
77                              "slicing"),
78                     cl::init(false));
79 
80   static cl::opt<bool>
81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82                       cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86   class DAGCombiner {
87     SelectionDAG &DAG;
88     const TargetLowering &TLI;
89     CombineLevel Level;
90     CodeGenOpt::Level OptLevel;
91     bool LegalOperations;
92     bool LegalTypes;
93     bool ForCodeSize;
94 
95     /// \brief Worklist of all of the nodes that need to be simplified.
96     ///
97     /// This must behave as a stack -- new nodes to process are pushed onto the
98     /// back and when processing we pop off of the back.
99     ///
100     /// The worklist will not contain duplicates but may contain null entries
101     /// due to nodes being deleted from the underlying DAG.
102     SmallVector<SDNode *, 64> Worklist;
103 
104     /// \brief Mapping from an SDNode to its position on the worklist.
105     ///
106     /// This is used to find and remove nodes from the worklist (by nulling
107     /// them) when they are deleted from the underlying DAG. It relies on
108     /// stable indices of nodes within the worklist.
109     DenseMap<SDNode *, unsigned> WorklistMap;
110 
111     /// \brief Set of nodes which have been combined (at least once).
112     ///
113     /// This is used to allow us to reliably add any operands of a DAG node
114     /// which have not yet been combined to the worklist.
115     SmallPtrSet<SDNode *, 32> CombinedNodes;
116 
117     // AA - Used for DAG load/store alias analysis.
118     AliasAnalysis *AA;
119 
120     /// When an instruction is simplified, add all users of the instruction to
121     /// the work lists because they might get more simplified now.
122     void AddUsersToWorklist(SDNode *N) {
123       for (SDNode *Node : N->uses())
124         AddToWorklist(Node);
125     }
126 
127     /// Call the node-specific routine that folds each particular type of node.
128     SDValue visit(SDNode *N);
129 
130   public:
131     /// Add to the worklist making sure its instance is at the back (next to be
132     /// processed.)
133     void AddToWorklist(SDNode *N) {
134       assert(N->getOpcode() != ISD::DELETED_NODE &&
135              "Deleted Node added to Worklist");
136 
137       // Skip handle nodes as they can't usefully be combined and confuse the
138       // zero-use deletion strategy.
139       if (N->getOpcode() == ISD::HANDLENODE)
140         return;
141 
142       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
143         Worklist.push_back(N);
144     }
145 
146     /// Remove all instances of N from the worklist.
147     void removeFromWorklist(SDNode *N) {
148       CombinedNodes.erase(N);
149 
150       auto It = WorklistMap.find(N);
151       if (It == WorklistMap.end())
152         return; // Not in the worklist.
153 
154       // Null out the entry rather than erasing it to avoid a linear operation.
155       Worklist[It->second] = nullptr;
156       WorklistMap.erase(It);
157     }
158 
159     void deleteAndRecombine(SDNode *N);
160     bool recursivelyDeleteUnusedNodes(SDNode *N);
161 
162     /// Replaces all uses of the results of one DAG node with new values.
163     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
164                       bool AddTo = true);
165 
166     /// Replaces all uses of the results of one DAG node with new values.
167     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
168       return CombineTo(N, &Res, 1, AddTo);
169     }
170 
171     /// Replaces all uses of the results of one DAG node with new values.
172     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
173                       bool AddTo = true) {
174       SDValue To[] = { Res0, Res1 };
175       return CombineTo(N, To, 2, AddTo);
176     }
177 
178     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
179 
180   private:
181     unsigned MaximumLegalStoreInBits;
182 
183     /// Check the specified integer node value to see if it can be simplified or
184     /// if things it uses can be simplified by bit propagation.
185     /// If so, return true.
186     bool SimplifyDemandedBits(SDValue Op) {
187       unsigned BitWidth = Op.getScalarValueSizeInBits();
188       APInt Demanded = APInt::getAllOnesValue(BitWidth);
189       return SimplifyDemandedBits(Op, Demanded);
190     }
191 
192     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
193 
194     bool CombineToPreIndexedLoadStore(SDNode *N);
195     bool CombineToPostIndexedLoadStore(SDNode *N);
196     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
197     bool SliceUpLoad(SDNode *N);
198 
199     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
200     ///   load.
201     ///
202     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
203     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
204     /// \param EltNo index of the vector element to load.
205     /// \param OriginalLoad load that EVE came from to be replaced.
206     /// \returns EVE on success SDValue() on failure.
207     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
208         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
209     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
210     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
211     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
212     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
213     SDValue PromoteIntBinOp(SDValue Op);
214     SDValue PromoteIntShiftOp(SDValue Op);
215     SDValue PromoteExtend(SDValue Op);
216     bool PromoteLoad(SDValue Op);
217 
218     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
219                          SDValue ExtLoad, const SDLoc &DL,
220                          ISD::NodeType ExtType);
221 
222     /// Call the node-specific routine that knows how to fold each
223     /// particular type of node. If that doesn't do anything, try the
224     /// target-specific DAG combines.
225     SDValue combine(SDNode *N);
226 
227     // Visitation implementation - Implement dag node combining for different
228     // node types.  The semantics are as follows:
229     // Return Value:
230     //   SDValue.getNode() == 0 - No change was made
231     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
232     //   otherwise              - N should be replaced by the returned Operand.
233     //
234     SDValue visitTokenFactor(SDNode *N);
235     SDValue visitMERGE_VALUES(SDNode *N);
236     SDValue visitADD(SDNode *N);
237     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
238     SDValue visitSUB(SDNode *N);
239     SDValue visitADDC(SDNode *N);
240     SDValue visitUADDO(SDNode *N);
241     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
242     SDValue visitSUBC(SDNode *N);
243     SDValue visitUSUBO(SDNode *N);
244     SDValue visitADDE(SDNode *N);
245     SDValue visitADDCARRY(SDNode *N);
246     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
247     SDValue visitSUBE(SDNode *N);
248     SDValue visitSUBCARRY(SDNode *N);
249     SDValue visitMUL(SDNode *N);
250     SDValue useDivRem(SDNode *N);
251     SDValue visitSDIV(SDNode *N);
252     SDValue visitUDIV(SDNode *N);
253     SDValue visitREM(SDNode *N);
254     SDValue visitMULHU(SDNode *N);
255     SDValue visitMULHS(SDNode *N);
256     SDValue visitSMUL_LOHI(SDNode *N);
257     SDValue visitUMUL_LOHI(SDNode *N);
258     SDValue visitSMULO(SDNode *N);
259     SDValue visitUMULO(SDNode *N);
260     SDValue visitIMINMAX(SDNode *N);
261     SDValue visitAND(SDNode *N);
262     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
263     SDValue visitOR(SDNode *N);
264     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
265     SDValue visitXOR(SDNode *N);
266     SDValue SimplifyVBinOp(SDNode *N);
267     SDValue visitSHL(SDNode *N);
268     SDValue visitSRA(SDNode *N);
269     SDValue visitSRL(SDNode *N);
270     SDValue visitRotate(SDNode *N);
271     SDValue visitABS(SDNode *N);
272     SDValue visitBSWAP(SDNode *N);
273     SDValue visitBITREVERSE(SDNode *N);
274     SDValue visitCTLZ(SDNode *N);
275     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
276     SDValue visitCTTZ(SDNode *N);
277     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
278     SDValue visitCTPOP(SDNode *N);
279     SDValue visitSELECT(SDNode *N);
280     SDValue visitVSELECT(SDNode *N);
281     SDValue visitSELECT_CC(SDNode *N);
282     SDValue visitSETCC(SDNode *N);
283     SDValue visitSETCCE(SDNode *N);
284     SDValue visitSETCCCARRY(SDNode *N);
285     SDValue visitSIGN_EXTEND(SDNode *N);
286     SDValue visitZERO_EXTEND(SDNode *N);
287     SDValue visitANY_EXTEND(SDNode *N);
288     SDValue visitAssertZext(SDNode *N);
289     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
290     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
291     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
292     SDValue visitTRUNCATE(SDNode *N);
293     SDValue visitBITCAST(SDNode *N);
294     SDValue visitBUILD_PAIR(SDNode *N);
295     SDValue visitFADD(SDNode *N);
296     SDValue visitFSUB(SDNode *N);
297     SDValue visitFMUL(SDNode *N);
298     SDValue visitFMA(SDNode *N);
299     SDValue visitFDIV(SDNode *N);
300     SDValue visitFREM(SDNode *N);
301     SDValue visitFSQRT(SDNode *N);
302     SDValue visitFCOPYSIGN(SDNode *N);
303     SDValue visitSINT_TO_FP(SDNode *N);
304     SDValue visitUINT_TO_FP(SDNode *N);
305     SDValue visitFP_TO_SINT(SDNode *N);
306     SDValue visitFP_TO_UINT(SDNode *N);
307     SDValue visitFP_ROUND(SDNode *N);
308     SDValue visitFP_ROUND_INREG(SDNode *N);
309     SDValue visitFP_EXTEND(SDNode *N);
310     SDValue visitFNEG(SDNode *N);
311     SDValue visitFABS(SDNode *N);
312     SDValue visitFCEIL(SDNode *N);
313     SDValue visitFTRUNC(SDNode *N);
314     SDValue visitFFLOOR(SDNode *N);
315     SDValue visitFMINNUM(SDNode *N);
316     SDValue visitFMAXNUM(SDNode *N);
317     SDValue visitBRCOND(SDNode *N);
318     SDValue visitBR_CC(SDNode *N);
319     SDValue visitLOAD(SDNode *N);
320 
321     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
322     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
323 
324     SDValue visitSTORE(SDNode *N);
325     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
326     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
327     SDValue visitBUILD_VECTOR(SDNode *N);
328     SDValue visitCONCAT_VECTORS(SDNode *N);
329     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
330     SDValue visitVECTOR_SHUFFLE(SDNode *N);
331     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
332     SDValue visitINSERT_SUBVECTOR(SDNode *N);
333     SDValue visitMLOAD(SDNode *N);
334     SDValue visitMSTORE(SDNode *N);
335     SDValue visitMGATHER(SDNode *N);
336     SDValue visitMSCATTER(SDNode *N);
337     SDValue visitFP_TO_FP16(SDNode *N);
338     SDValue visitFP16_TO_FP(SDNode *N);
339 
340     SDValue visitFADDForFMACombine(SDNode *N);
341     SDValue visitFSUBForFMACombine(SDNode *N);
342     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
343 
344     SDValue XformToShuffleWithZero(SDNode *N);
345     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
346                            SDValue RHS);
347 
348     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
349 
350     SDValue foldSelectOfConstants(SDNode *N);
351     SDValue foldBinOpIntoSelect(SDNode *BO);
352     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
353     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
354     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
355     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
356                              SDValue N2, SDValue N3, ISD::CondCode CC,
357                              bool NotExtCompare = false);
358     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
359                                    SDValue N2, SDValue N3, ISD::CondCode CC);
360     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
361                               const SDLoc &DL);
362     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
363                           const SDLoc &DL, bool foldBooleans = true);
364 
365     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
366                            SDValue &CC) const;
367     bool isOneUseSetCC(SDValue N) const;
368 
369     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
370                                          unsigned HiOp);
371     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
372     SDValue CombineExtLoad(SDNode *N);
373     SDValue combineRepeatedFPDivisors(SDNode *N);
374     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
375     SDValue BuildSDIV(SDNode *N);
376     SDValue BuildSDIVPow2(SDNode *N);
377     SDValue BuildUDIV(SDNode *N);
378     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
379     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
380     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
381     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
382     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
383     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
384                                 SDNodeFlags Flags, bool Reciprocal);
385     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
386                                 SDNodeFlags Flags, bool Reciprocal);
387     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
388                                bool DemandHighBits = true);
389     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
390     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
391                               SDValue InnerPos, SDValue InnerNeg,
392                               unsigned PosOpcode, unsigned NegOpcode,
393                               const SDLoc &DL);
394     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
395     SDValue MatchLoadCombine(SDNode *N);
396     SDValue ReduceLoadWidth(SDNode *N);
397     SDValue ReduceLoadOpStoreWidth(SDNode *N);
398     SDValue splitMergedValStore(StoreSDNode *ST);
399     SDValue TransformFPLoadStorePair(SDNode *N);
400     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
401     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
402     SDValue reduceBuildVecToShuffle(SDNode *N);
403     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
404                                   ArrayRef<int> VectorMask, SDValue VecIn1,
405                                   SDValue VecIn2, unsigned LeftIdx);
406     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
407 
408     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
409 
410     /// Walk up chain skipping non-aliasing memory nodes,
411     /// looking for aliasing nodes and adding them to the Aliases vector.
412     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
413                           SmallVectorImpl<SDValue> &Aliases);
414 
415     /// Return true if there is any possibility that the two addresses overlap.
416     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
417 
418     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
419     /// chain (aliasing node.)
420     SDValue FindBetterChain(SDNode *N, SDValue Chain);
421 
422     /// Try to replace a store and any possibly adjacent stores on
423     /// consecutive chains with better chains. Return true only if St is
424     /// replaced.
425     ///
426     /// Notice that other chains may still be replaced even if the function
427     /// returns false.
428     bool findBetterNeighborChains(StoreSDNode *St);
429 
430     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
431     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
432 
433     /// Holds a pointer to an LSBaseSDNode as well as information on where it
434     /// is located in a sequence of memory operations connected by a chain.
435     struct MemOpLink {
436       MemOpLink(LSBaseSDNode *N, int64_t Offset)
437           : MemNode(N), OffsetFromBase(Offset) {}
438       // Ptr to the mem node.
439       LSBaseSDNode *MemNode;
440       // Offset from the base ptr.
441       int64_t OffsetFromBase;
442     };
443 
444     /// This is a helper function for visitMUL to check the profitability
445     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
446     /// MulNode is the original multiply, AddNode is (add x, c1),
447     /// and ConstNode is c2.
448     bool isMulAddWithConstProfitable(SDNode *MulNode,
449                                      SDValue &AddNode,
450                                      SDValue &ConstNode);
451 
452 
453     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
454     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
455     /// the type of the loaded value to be extended.  LoadedVT returns the type
456     /// of the original loaded value.  NarrowLoad returns whether the load would
457     /// need to be narrowed in order to match.
458     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
459                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
460                           bool &NarrowLoad);
461 
462     /// Helper function for MergeConsecutiveStores which merges the
463     /// component store chains.
464     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
465                                 unsigned NumStores);
466 
467     /// This is a helper function for MergeConsecutiveStores. When the source
468     /// elements of the consecutive stores are all constants or all extracted
469     /// vector elements, try to merge them into one larger store.
470     /// \return True if a merged store was created.
471     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
472                                          EVT MemVT, unsigned NumStores,
473                                          bool IsConstantSrc, bool UseVector,
474                                          bool UseTrunc);
475 
476     /// This is a helper function for MergeConsecutiveStores.
477     /// Stores that may be merged are placed in StoreNodes.
478     void getStoreMergeCandidates(StoreSDNode *St,
479                                  SmallVectorImpl<MemOpLink> &StoreNodes);
480 
481     /// Helper function for MergeConsecutiveStores. Checks if
482     /// Candidate stores have indirect dependency through their
483     /// operands. \return True if safe to merge
484     bool checkMergeStoreCandidatesForDependencies(
485         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
486 
487     /// Merge consecutive store operations into a wide store.
488     /// This optimization uses wide integers or vectors when possible.
489     /// \return number of stores that were merged into a merged store (the
490     /// affected nodes are stored as a prefix in \p StoreNodes).
491     bool MergeConsecutiveStores(StoreSDNode *N);
492 
493     /// \brief Try to transform a truncation where C is a constant:
494     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
495     ///
496     /// \p N needs to be a truncation and its first operand an AND. Other
497     /// requirements are checked by the function (e.g. that trunc is
498     /// single-use) and if missed an empty SDValue is returned.
499     SDValue distributeTruncateThroughAnd(SDNode *N);
500 
501   public:
502     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
503         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
504           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
505       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
506 
507       MaximumLegalStoreInBits = 0;
508       for (MVT VT : MVT::all_valuetypes())
509         if (EVT(VT).isSimple() && VT != MVT::Other &&
510             TLI.isTypeLegal(EVT(VT)) &&
511             VT.getSizeInBits() >= MaximumLegalStoreInBits)
512           MaximumLegalStoreInBits = VT.getSizeInBits();
513     }
514 
515     /// Runs the dag combiner on all nodes in the work list
516     void Run(CombineLevel AtLevel);
517 
518     SelectionDAG &getDAG() const { return DAG; }
519 
520     /// Returns a type large enough to hold any valid shift amount - before type
521     /// legalization these can be huge.
522     EVT getShiftAmountTy(EVT LHSTy) {
523       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
524       if (LHSTy.isVector())
525         return LHSTy;
526       auto &DL = DAG.getDataLayout();
527       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
528                         : TLI.getPointerTy(DL);
529     }
530 
531     /// This method returns true if we are running before type legalization or
532     /// if the specified VT is legal.
533     bool isTypeLegal(const EVT &VT) {
534       if (!LegalTypes) return true;
535       return TLI.isTypeLegal(VT);
536     }
537 
538     /// Convenience wrapper around TargetLowering::getSetCCResultType
539     EVT getSetCCResultType(EVT VT) const {
540       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
541     }
542   };
543 }
544 
545 
546 namespace {
547 /// This class is a DAGUpdateListener that removes any deleted
548 /// nodes from the worklist.
549 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
550   DAGCombiner &DC;
551 public:
552   explicit WorklistRemover(DAGCombiner &dc)
553     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
554 
555   void NodeDeleted(SDNode *N, SDNode *E) override {
556     DC.removeFromWorklist(N);
557   }
558 };
559 }
560 
561 //===----------------------------------------------------------------------===//
562 //  TargetLowering::DAGCombinerInfo implementation
563 //===----------------------------------------------------------------------===//
564 
565 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
566   ((DAGCombiner*)DC)->AddToWorklist(N);
567 }
568 
569 SDValue TargetLowering::DAGCombinerInfo::
570 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
571   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
572 }
573 
574 SDValue TargetLowering::DAGCombinerInfo::
575 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
576   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
577 }
578 
579 
580 SDValue TargetLowering::DAGCombinerInfo::
581 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
582   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
583 }
584 
585 void TargetLowering::DAGCombinerInfo::
586 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
587   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
588 }
589 
590 //===----------------------------------------------------------------------===//
591 // Helper Functions
592 //===----------------------------------------------------------------------===//
593 
594 void DAGCombiner::deleteAndRecombine(SDNode *N) {
595   removeFromWorklist(N);
596 
597   // If the operands of this node are only used by the node, they will now be
598   // dead. Make sure to re-visit them and recursively delete dead nodes.
599   for (const SDValue &Op : N->ops())
600     // For an operand generating multiple values, one of the values may
601     // become dead allowing further simplification (e.g. split index
602     // arithmetic from an indexed load).
603     if (Op->hasOneUse() || Op->getNumValues() > 1)
604       AddToWorklist(Op.getNode());
605 
606   DAG.DeleteNode(N);
607 }
608 
609 /// Return 1 if we can compute the negated form of the specified expression for
610 /// the same cost as the expression itself, or 2 if we can compute the negated
611 /// form more cheaply than the expression itself.
612 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
613                                const TargetLowering &TLI,
614                                const TargetOptions *Options,
615                                unsigned Depth = 0) {
616   // fneg is removable even if it has multiple uses.
617   if (Op.getOpcode() == ISD::FNEG) return 2;
618 
619   // Don't allow anything with multiple uses.
620   if (!Op.hasOneUse()) return 0;
621 
622   // Don't recurse exponentially.
623   if (Depth > 6) return 0;
624 
625   switch (Op.getOpcode()) {
626   default: return false;
627   case ISD::ConstantFP: {
628     if (!LegalOperations)
629       return 1;
630 
631     // Don't invert constant FP values after legalization unless the target says
632     // the negated constant is legal.
633     EVT VT = Op.getValueType();
634     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
635       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
636   }
637   case ISD::FADD:
638     // FIXME: determine better conditions for this xform.
639     if (!Options->UnsafeFPMath) return 0;
640 
641     // After operation legalization, it might not be legal to create new FSUBs.
642     if (LegalOperations &&
643         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
644       return 0;
645 
646     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
647     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
648                                     Options, Depth + 1))
649       return V;
650     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
651     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
652                               Depth + 1);
653   case ISD::FSUB:
654     // We can't turn -(A-B) into B-A when we honor signed zeros.
655     if (!Options->NoSignedZerosFPMath &&
656         !Op.getNode()->getFlags().hasNoSignedZeros())
657       return 0;
658 
659     // fold (fneg (fsub A, B)) -> (fsub B, A)
660     return 1;
661 
662   case ISD::FMUL:
663   case ISD::FDIV:
664     if (Options->HonorSignDependentRoundingFPMath()) return 0;
665 
666     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
667     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
668                                     Options, Depth + 1))
669       return V;
670 
671     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
672                               Depth + 1);
673 
674   case ISD::FP_EXTEND:
675   case ISD::FP_ROUND:
676   case ISD::FSIN:
677     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
678                               Depth + 1);
679   }
680 }
681 
682 /// If isNegatibleForFree returns true, return the newly negated expression.
683 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
684                                     bool LegalOperations, unsigned Depth = 0) {
685   const TargetOptions &Options = DAG.getTarget().Options;
686   // fneg is removable even if it has multiple uses.
687   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
688 
689   // Don't allow anything with multiple uses.
690   assert(Op.hasOneUse() && "Unknown reuse!");
691 
692   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
693 
694   const SDNodeFlags Flags = Op.getNode()->getFlags();
695 
696   switch (Op.getOpcode()) {
697   default: llvm_unreachable("Unknown code");
698   case ISD::ConstantFP: {
699     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
700     V.changeSign();
701     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
702   }
703   case ISD::FADD:
704     // FIXME: determine better conditions for this xform.
705     assert(Options.UnsafeFPMath);
706 
707     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
708     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
709                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
710       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
711                          GetNegatedExpression(Op.getOperand(0), DAG,
712                                               LegalOperations, Depth+1),
713                          Op.getOperand(1), Flags);
714     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
715     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
716                        GetNegatedExpression(Op.getOperand(1), DAG,
717                                             LegalOperations, Depth+1),
718                        Op.getOperand(0), Flags);
719   case ISD::FSUB:
720     // fold (fneg (fsub 0, B)) -> B
721     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
722       if (N0CFP->isZero())
723         return Op.getOperand(1);
724 
725     // fold (fneg (fsub A, B)) -> (fsub B, A)
726     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
727                        Op.getOperand(1), Op.getOperand(0), Flags);
728 
729   case ISD::FMUL:
730   case ISD::FDIV:
731     assert(!Options.HonorSignDependentRoundingFPMath());
732 
733     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
734     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
735                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
736       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
737                          GetNegatedExpression(Op.getOperand(0), DAG,
738                                               LegalOperations, Depth+1),
739                          Op.getOperand(1), Flags);
740 
741     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
742     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
743                        Op.getOperand(0),
744                        GetNegatedExpression(Op.getOperand(1), DAG,
745                                             LegalOperations, Depth+1), Flags);
746 
747   case ISD::FP_EXTEND:
748   case ISD::FSIN:
749     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
750                        GetNegatedExpression(Op.getOperand(0), DAG,
751                                             LegalOperations, Depth+1));
752   case ISD::FP_ROUND:
753       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
754                          GetNegatedExpression(Op.getOperand(0), DAG,
755                                               LegalOperations, Depth+1),
756                          Op.getOperand(1));
757   }
758 }
759 
760 // APInts must be the same size for most operations, this helper
761 // function zero extends the shorter of the pair so that they match.
762 // We provide an Offset so that we can create bitwidths that won't overflow.
763 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
764   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
765   LHS = LHS.zextOrSelf(Bits);
766   RHS = RHS.zextOrSelf(Bits);
767 }
768 
769 // Return true if this node is a setcc, or is a select_cc
770 // that selects between the target values used for true and false, making it
771 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
772 // the appropriate nodes based on the type of node we are checking. This
773 // simplifies life a bit for the callers.
774 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
775                                     SDValue &CC) const {
776   if (N.getOpcode() == ISD::SETCC) {
777     LHS = N.getOperand(0);
778     RHS = N.getOperand(1);
779     CC  = N.getOperand(2);
780     return true;
781   }
782 
783   if (N.getOpcode() != ISD::SELECT_CC ||
784       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
785       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
786     return false;
787 
788   if (TLI.getBooleanContents(N.getValueType()) ==
789       TargetLowering::UndefinedBooleanContent)
790     return false;
791 
792   LHS = N.getOperand(0);
793   RHS = N.getOperand(1);
794   CC  = N.getOperand(4);
795   return true;
796 }
797 
798 /// Return true if this is a SetCC-equivalent operation with only one use.
799 /// If this is true, it allows the users to invert the operation for free when
800 /// it is profitable to do so.
801 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
802   SDValue N0, N1, N2;
803   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
804     return true;
805   return false;
806 }
807 
808 // \brief Returns the SDNode if it is a constant float BuildVector
809 // or constant float.
810 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
811   if (isa<ConstantFPSDNode>(N))
812     return N.getNode();
813   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
814     return N.getNode();
815   return nullptr;
816 }
817 
818 // Determines if it is a constant integer or a build vector of constant
819 // integers (and undefs).
820 // Do not permit build vector implicit truncation.
821 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
822   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
823     return !(Const->isOpaque() && NoOpaques);
824   if (N.getOpcode() != ISD::BUILD_VECTOR)
825     return false;
826   unsigned BitWidth = N.getScalarValueSizeInBits();
827   for (const SDValue &Op : N->op_values()) {
828     if (Op.isUndef())
829       continue;
830     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
831     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
832         (Const->isOpaque() && NoOpaques))
833       return false;
834   }
835   return true;
836 }
837 
838 // Determines if it is a constant null integer or a splatted vector of a
839 // constant null integer (with no undefs).
840 // Build vector implicit truncation is not an issue for null values.
841 static bool isNullConstantOrNullSplatConstant(SDValue N) {
842   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
843     return Splat->isNullValue();
844   return false;
845 }
846 
847 // Determines if it is a constant integer of one or a splatted vector of a
848 // constant integer of one (with no undefs).
849 // Do not permit build vector implicit truncation.
850 static bool isOneConstantOrOneSplatConstant(SDValue N) {
851   unsigned BitWidth = N.getScalarValueSizeInBits();
852   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
853     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
854   return false;
855 }
856 
857 // Determines if it is a constant integer of all ones or a splatted vector of a
858 // constant integer of all ones (with no undefs).
859 // Do not permit build vector implicit truncation.
860 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
861   unsigned BitWidth = N.getScalarValueSizeInBits();
862   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
863     return Splat->isAllOnesValue() &&
864            Splat->getAPIntValue().getBitWidth() == BitWidth;
865   return false;
866 }
867 
868 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
869 // undef's.
870 static bool isAnyConstantBuildVector(const SDNode *N) {
871   return ISD::isBuildVectorOfConstantSDNodes(N) ||
872          ISD::isBuildVectorOfConstantFPSDNodes(N);
873 }
874 
875 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
876                                     SDValue N1) {
877   EVT VT = N0.getValueType();
878   if (N0.getOpcode() == Opc) {
879     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
880       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
881         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
882         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
883           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
884         return SDValue();
885       }
886       if (N0.hasOneUse()) {
887         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
888         // use
889         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
890         if (!OpNode.getNode())
891           return SDValue();
892         AddToWorklist(OpNode.getNode());
893         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
894       }
895     }
896   }
897 
898   if (N1.getOpcode() == Opc) {
899     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
900       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
901         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
902         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
903           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
904         return SDValue();
905       }
906       if (N1.hasOneUse()) {
907         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
908         // use
909         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
910         if (!OpNode.getNode())
911           return SDValue();
912         AddToWorklist(OpNode.getNode());
913         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
914       }
915     }
916   }
917 
918   return SDValue();
919 }
920 
921 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
922                                bool AddTo) {
923   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
924   ++NodesCombined;
925   DEBUG(dbgs() << "\nReplacing.1 ";
926         N->dump(&DAG);
927         dbgs() << "\nWith: ";
928         To[0].getNode()->dump(&DAG);
929         dbgs() << " and " << NumTo-1 << " other values\n");
930   for (unsigned i = 0, e = NumTo; i != e; ++i)
931     assert((!To[i].getNode() ||
932             N->getValueType(i) == To[i].getValueType()) &&
933            "Cannot combine value to value of different type!");
934 
935   WorklistRemover DeadNodes(*this);
936   DAG.ReplaceAllUsesWith(N, To);
937   if (AddTo) {
938     // Push the new nodes and any users onto the worklist
939     for (unsigned i = 0, e = NumTo; i != e; ++i) {
940       if (To[i].getNode()) {
941         AddToWorklist(To[i].getNode());
942         AddUsersToWorklist(To[i].getNode());
943       }
944     }
945   }
946 
947   // Finally, if the node is now dead, remove it from the graph.  The node
948   // may not be dead if the replacement process recursively simplified to
949   // something else needing this node.
950   if (N->use_empty())
951     deleteAndRecombine(N);
952   return SDValue(N, 0);
953 }
954 
955 void DAGCombiner::
956 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
957   // Replace all uses.  If any nodes become isomorphic to other nodes and
958   // are deleted, make sure to remove them from our worklist.
959   WorklistRemover DeadNodes(*this);
960   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
961 
962   // Push the new node and any (possibly new) users onto the worklist.
963   AddToWorklist(TLO.New.getNode());
964   AddUsersToWorklist(TLO.New.getNode());
965 
966   // Finally, if the node is now dead, remove it from the graph.  The node
967   // may not be dead if the replacement process recursively simplified to
968   // something else needing this node.
969   if (TLO.Old.getNode()->use_empty())
970     deleteAndRecombine(TLO.Old.getNode());
971 }
972 
973 /// Check the specified integer node value to see if it can be simplified or if
974 /// things it uses can be simplified by bit propagation. If so, return true.
975 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
976   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
977   KnownBits Known;
978   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
979     return false;
980 
981   // Revisit the node.
982   AddToWorklist(Op.getNode());
983 
984   // Replace the old value with the new one.
985   ++NodesCombined;
986   DEBUG(dbgs() << "\nReplacing.2 ";
987         TLO.Old.getNode()->dump(&DAG);
988         dbgs() << "\nWith: ";
989         TLO.New.getNode()->dump(&DAG);
990         dbgs() << '\n');
991 
992   CommitTargetLoweringOpt(TLO);
993   return true;
994 }
995 
996 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
997   SDLoc DL(Load);
998   EVT VT = Load->getValueType(0);
999   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1000 
1001   DEBUG(dbgs() << "\nReplacing.9 ";
1002         Load->dump(&DAG);
1003         dbgs() << "\nWith: ";
1004         Trunc.getNode()->dump(&DAG);
1005         dbgs() << '\n');
1006   WorklistRemover DeadNodes(*this);
1007   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1008   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1009   deleteAndRecombine(Load);
1010   AddToWorklist(Trunc.getNode());
1011 }
1012 
1013 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1014   Replace = false;
1015   SDLoc DL(Op);
1016   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1017     LoadSDNode *LD = cast<LoadSDNode>(Op);
1018     EVT MemVT = LD->getMemoryVT();
1019     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1020       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1021                                                        : ISD::EXTLOAD)
1022       : LD->getExtensionType();
1023     Replace = true;
1024     return DAG.getExtLoad(ExtType, DL, PVT,
1025                           LD->getChain(), LD->getBasePtr(),
1026                           MemVT, LD->getMemOperand());
1027   }
1028 
1029   unsigned Opc = Op.getOpcode();
1030   switch (Opc) {
1031   default: break;
1032   case ISD::AssertSext:
1033     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1034       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1035     break;
1036   case ISD::AssertZext:
1037     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1038       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1039     break;
1040   case ISD::Constant: {
1041     unsigned ExtOpc =
1042       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1043     return DAG.getNode(ExtOpc, DL, PVT, Op);
1044   }
1045   }
1046 
1047   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1048     return SDValue();
1049   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1050 }
1051 
1052 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1053   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1054     return SDValue();
1055   EVT OldVT = Op.getValueType();
1056   SDLoc DL(Op);
1057   bool Replace = false;
1058   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1059   if (!NewOp.getNode())
1060     return SDValue();
1061   AddToWorklist(NewOp.getNode());
1062 
1063   if (Replace)
1064     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1065   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1066                      DAG.getValueType(OldVT));
1067 }
1068 
1069 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1070   EVT OldVT = Op.getValueType();
1071   SDLoc DL(Op);
1072   bool Replace = false;
1073   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1074   if (!NewOp.getNode())
1075     return SDValue();
1076   AddToWorklist(NewOp.getNode());
1077 
1078   if (Replace)
1079     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1080   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1081 }
1082 
1083 /// Promote the specified integer binary operation if the target indicates it is
1084 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1085 /// i32 since i16 instructions are longer.
1086 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1087   if (!LegalOperations)
1088     return SDValue();
1089 
1090   EVT VT = Op.getValueType();
1091   if (VT.isVector() || !VT.isInteger())
1092     return SDValue();
1093 
1094   // If operation type is 'undesirable', e.g. i16 on x86, consider
1095   // promoting it.
1096   unsigned Opc = Op.getOpcode();
1097   if (TLI.isTypeDesirableForOp(Opc, VT))
1098     return SDValue();
1099 
1100   EVT PVT = VT;
1101   // Consult target whether it is a good idea to promote this operation and
1102   // what's the right type to promote it to.
1103   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1104     assert(PVT != VT && "Don't know what type to promote to!");
1105 
1106     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1107 
1108     bool Replace0 = false;
1109     SDValue N0 = Op.getOperand(0);
1110     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1111 
1112     bool Replace1 = false;
1113     SDValue N1 = Op.getOperand(1);
1114     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1115     SDLoc DL(Op);
1116 
1117     SDValue RV =
1118         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1119 
1120     // New replace instances of N0 and N1
1121     if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
1122         NN0.getOpcode() != ISD::DELETED_NODE) {
1123       AddToWorklist(NN0.getNode());
1124       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1125     }
1126 
1127     if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
1128         NN1.getOpcode() != ISD::DELETED_NODE) {
1129       AddToWorklist(NN1.getNode());
1130       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1131     }
1132 
1133     // Deal with Op being deleted.
1134     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1135       return RV;
1136   }
1137   return SDValue();
1138 }
1139 
1140 /// Promote the specified integer shift operation if the target indicates it is
1141 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1142 /// i32 since i16 instructions are longer.
1143 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1144   if (!LegalOperations)
1145     return SDValue();
1146 
1147   EVT VT = Op.getValueType();
1148   if (VT.isVector() || !VT.isInteger())
1149     return SDValue();
1150 
1151   // If operation type is 'undesirable', e.g. i16 on x86, consider
1152   // promoting it.
1153   unsigned Opc = Op.getOpcode();
1154   if (TLI.isTypeDesirableForOp(Opc, VT))
1155     return SDValue();
1156 
1157   EVT PVT = VT;
1158   // Consult target whether it is a good idea to promote this operation and
1159   // what's the right type to promote it to.
1160   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1161     assert(PVT != VT && "Don't know what type to promote to!");
1162 
1163     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1164 
1165     bool Replace = false;
1166     SDValue N0 = Op.getOperand(0);
1167     SDValue N1 = Op.getOperand(1);
1168     if (Opc == ISD::SRA)
1169       N0 = SExtPromoteOperand(N0, PVT);
1170     else if (Opc == ISD::SRL)
1171       N0 = ZExtPromoteOperand(N0, PVT);
1172     else
1173       N0 = PromoteOperand(N0, PVT, Replace);
1174 
1175     if (!N0.getNode())
1176       return SDValue();
1177 
1178     SDLoc DL(Op);
1179     SDValue RV =
1180         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1181 
1182     AddToWorklist(N0.getNode());
1183     if (Replace)
1184       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1185 
1186     // Deal with Op being deleted.
1187     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1188       return RV;
1189   }
1190   return SDValue();
1191 }
1192 
1193 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1194   if (!LegalOperations)
1195     return SDValue();
1196 
1197   EVT VT = Op.getValueType();
1198   if (VT.isVector() || !VT.isInteger())
1199     return SDValue();
1200 
1201   // If operation type is 'undesirable', e.g. i16 on x86, consider
1202   // promoting it.
1203   unsigned Opc = Op.getOpcode();
1204   if (TLI.isTypeDesirableForOp(Opc, VT))
1205     return SDValue();
1206 
1207   EVT PVT = VT;
1208   // Consult target whether it is a good idea to promote this operation and
1209   // what's the right type to promote it to.
1210   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1211     assert(PVT != VT && "Don't know what type to promote to!");
1212     // fold (aext (aext x)) -> (aext x)
1213     // fold (aext (zext x)) -> (zext x)
1214     // fold (aext (sext x)) -> (sext x)
1215     DEBUG(dbgs() << "\nPromoting ";
1216           Op.getNode()->dump(&DAG));
1217     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1218   }
1219   return SDValue();
1220 }
1221 
1222 bool DAGCombiner::PromoteLoad(SDValue Op) {
1223   if (!LegalOperations)
1224     return false;
1225 
1226   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1227     return false;
1228 
1229   EVT VT = Op.getValueType();
1230   if (VT.isVector() || !VT.isInteger())
1231     return false;
1232 
1233   // If operation type is 'undesirable', e.g. i16 on x86, consider
1234   // promoting it.
1235   unsigned Opc = Op.getOpcode();
1236   if (TLI.isTypeDesirableForOp(Opc, VT))
1237     return false;
1238 
1239   EVT PVT = VT;
1240   // Consult target whether it is a good idea to promote this operation and
1241   // what's the right type to promote it to.
1242   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1243     assert(PVT != VT && "Don't know what type to promote to!");
1244 
1245     SDLoc DL(Op);
1246     SDNode *N = Op.getNode();
1247     LoadSDNode *LD = cast<LoadSDNode>(N);
1248     EVT MemVT = LD->getMemoryVT();
1249     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1250       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1251                                                        : ISD::EXTLOAD)
1252       : LD->getExtensionType();
1253     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1254                                    LD->getChain(), LD->getBasePtr(),
1255                                    MemVT, LD->getMemOperand());
1256     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1257 
1258     DEBUG(dbgs() << "\nPromoting ";
1259           N->dump(&DAG);
1260           dbgs() << "\nTo: ";
1261           Result.getNode()->dump(&DAG);
1262           dbgs() << '\n');
1263     WorklistRemover DeadNodes(*this);
1264     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1265     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1266     deleteAndRecombine(N);
1267     AddToWorklist(Result.getNode());
1268     return true;
1269   }
1270   return false;
1271 }
1272 
1273 /// \brief Recursively delete a node which has no uses and any operands for
1274 /// which it is the only use.
1275 ///
1276 /// Note that this both deletes the nodes and removes them from the worklist.
1277 /// It also adds any nodes who have had a user deleted to the worklist as they
1278 /// may now have only one use and subject to other combines.
1279 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1280   if (!N->use_empty())
1281     return false;
1282 
1283   SmallSetVector<SDNode *, 16> Nodes;
1284   Nodes.insert(N);
1285   do {
1286     N = Nodes.pop_back_val();
1287     if (!N)
1288       continue;
1289 
1290     if (N->use_empty()) {
1291       for (const SDValue &ChildN : N->op_values())
1292         Nodes.insert(ChildN.getNode());
1293 
1294       removeFromWorklist(N);
1295       DAG.DeleteNode(N);
1296     } else {
1297       AddToWorklist(N);
1298     }
1299   } while (!Nodes.empty());
1300   return true;
1301 }
1302 
1303 //===----------------------------------------------------------------------===//
1304 //  Main DAG Combiner implementation
1305 //===----------------------------------------------------------------------===//
1306 
1307 void DAGCombiner::Run(CombineLevel AtLevel) {
1308   // set the instance variables, so that the various visit routines may use it.
1309   Level = AtLevel;
1310   LegalOperations = Level >= AfterLegalizeVectorOps;
1311   LegalTypes = Level >= AfterLegalizeTypes;
1312 
1313   // Add all the dag nodes to the worklist.
1314   for (SDNode &Node : DAG.allnodes())
1315     AddToWorklist(&Node);
1316 
1317   // Create a dummy node (which is not added to allnodes), that adds a reference
1318   // to the root node, preventing it from being deleted, and tracking any
1319   // changes of the root.
1320   HandleSDNode Dummy(DAG.getRoot());
1321 
1322   // While the worklist isn't empty, find a node and try to combine it.
1323   while (!WorklistMap.empty()) {
1324     SDNode *N;
1325     // The Worklist holds the SDNodes in order, but it may contain null entries.
1326     do {
1327       N = Worklist.pop_back_val();
1328     } while (!N);
1329 
1330     bool GoodWorklistEntry = WorklistMap.erase(N);
1331     (void)GoodWorklistEntry;
1332     assert(GoodWorklistEntry &&
1333            "Found a worklist entry without a corresponding map entry!");
1334 
1335     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1336     // N is deleted from the DAG, since they too may now be dead or may have a
1337     // reduced number of uses, allowing other xforms.
1338     if (recursivelyDeleteUnusedNodes(N))
1339       continue;
1340 
1341     WorklistRemover DeadNodes(*this);
1342 
1343     // If this combine is running after legalizing the DAG, re-legalize any
1344     // nodes pulled off the worklist.
1345     if (Level == AfterLegalizeDAG) {
1346       SmallSetVector<SDNode *, 16> UpdatedNodes;
1347       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1348 
1349       for (SDNode *LN : UpdatedNodes) {
1350         AddToWorklist(LN);
1351         AddUsersToWorklist(LN);
1352       }
1353       if (!NIsValid)
1354         continue;
1355     }
1356 
1357     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1358 
1359     // Add any operands of the new node which have not yet been combined to the
1360     // worklist as well. Because the worklist uniques things already, this
1361     // won't repeatedly process the same operand.
1362     CombinedNodes.insert(N);
1363     for (const SDValue &ChildN : N->op_values())
1364       if (!CombinedNodes.count(ChildN.getNode()))
1365         AddToWorklist(ChildN.getNode());
1366 
1367     SDValue RV = combine(N);
1368 
1369     if (!RV.getNode())
1370       continue;
1371 
1372     ++NodesCombined;
1373 
1374     // If we get back the same node we passed in, rather than a new node or
1375     // zero, we know that the node must have defined multiple values and
1376     // CombineTo was used.  Since CombineTo takes care of the worklist
1377     // mechanics for us, we have no work to do in this case.
1378     if (RV.getNode() == N)
1379       continue;
1380 
1381     assert(N->getOpcode() != ISD::DELETED_NODE &&
1382            RV.getOpcode() != ISD::DELETED_NODE &&
1383            "Node was deleted but visit returned new node!");
1384 
1385     DEBUG(dbgs() << " ... into: ";
1386           RV.getNode()->dump(&DAG));
1387 
1388     if (N->getNumValues() == RV.getNode()->getNumValues())
1389       DAG.ReplaceAllUsesWith(N, RV.getNode());
1390     else {
1391       assert(N->getValueType(0) == RV.getValueType() &&
1392              N->getNumValues() == 1 && "Type mismatch");
1393       DAG.ReplaceAllUsesWith(N, &RV);
1394     }
1395 
1396     // Push the new node and any users onto the worklist
1397     AddToWorklist(RV.getNode());
1398     AddUsersToWorklist(RV.getNode());
1399 
1400     // Finally, if the node is now dead, remove it from the graph.  The node
1401     // may not be dead if the replacement process recursively simplified to
1402     // something else needing this node. This will also take care of adding any
1403     // operands which have lost a user to the worklist.
1404     recursivelyDeleteUnusedNodes(N);
1405   }
1406 
1407   // If the root changed (e.g. it was a dead load, update the root).
1408   DAG.setRoot(Dummy.getValue());
1409   DAG.RemoveDeadNodes();
1410 }
1411 
1412 SDValue DAGCombiner::visit(SDNode *N) {
1413   switch (N->getOpcode()) {
1414   default: break;
1415   case ISD::TokenFactor:        return visitTokenFactor(N);
1416   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1417   case ISD::ADD:                return visitADD(N);
1418   case ISD::SUB:                return visitSUB(N);
1419   case ISD::ADDC:               return visitADDC(N);
1420   case ISD::UADDO:              return visitUADDO(N);
1421   case ISD::SUBC:               return visitSUBC(N);
1422   case ISD::USUBO:              return visitUSUBO(N);
1423   case ISD::ADDE:               return visitADDE(N);
1424   case ISD::ADDCARRY:           return visitADDCARRY(N);
1425   case ISD::SUBE:               return visitSUBE(N);
1426   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1427   case ISD::MUL:                return visitMUL(N);
1428   case ISD::SDIV:               return visitSDIV(N);
1429   case ISD::UDIV:               return visitUDIV(N);
1430   case ISD::SREM:
1431   case ISD::UREM:               return visitREM(N);
1432   case ISD::MULHU:              return visitMULHU(N);
1433   case ISD::MULHS:              return visitMULHS(N);
1434   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1435   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1436   case ISD::SMULO:              return visitSMULO(N);
1437   case ISD::UMULO:              return visitUMULO(N);
1438   case ISD::SMIN:
1439   case ISD::SMAX:
1440   case ISD::UMIN:
1441   case ISD::UMAX:               return visitIMINMAX(N);
1442   case ISD::AND:                return visitAND(N);
1443   case ISD::OR:                 return visitOR(N);
1444   case ISD::XOR:                return visitXOR(N);
1445   case ISD::SHL:                return visitSHL(N);
1446   case ISD::SRA:                return visitSRA(N);
1447   case ISD::SRL:                return visitSRL(N);
1448   case ISD::ROTR:
1449   case ISD::ROTL:               return visitRotate(N);
1450   case ISD::ABS:                return visitABS(N);
1451   case ISD::BSWAP:              return visitBSWAP(N);
1452   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1453   case ISD::CTLZ:               return visitCTLZ(N);
1454   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1455   case ISD::CTTZ:               return visitCTTZ(N);
1456   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1457   case ISD::CTPOP:              return visitCTPOP(N);
1458   case ISD::SELECT:             return visitSELECT(N);
1459   case ISD::VSELECT:            return visitVSELECT(N);
1460   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1461   case ISD::SETCC:              return visitSETCC(N);
1462   case ISD::SETCCE:             return visitSETCCE(N);
1463   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1464   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1465   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1466   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1467   case ISD::AssertZext:         return visitAssertZext(N);
1468   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1469   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1470   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1471   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1472   case ISD::BITCAST:            return visitBITCAST(N);
1473   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1474   case ISD::FADD:               return visitFADD(N);
1475   case ISD::FSUB:               return visitFSUB(N);
1476   case ISD::FMUL:               return visitFMUL(N);
1477   case ISD::FMA:                return visitFMA(N);
1478   case ISD::FDIV:               return visitFDIV(N);
1479   case ISD::FREM:               return visitFREM(N);
1480   case ISD::FSQRT:              return visitFSQRT(N);
1481   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1482   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1483   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1484   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1485   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1486   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1487   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1488   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1489   case ISD::FNEG:               return visitFNEG(N);
1490   case ISD::FABS:               return visitFABS(N);
1491   case ISD::FFLOOR:             return visitFFLOOR(N);
1492   case ISD::FMINNUM:            return visitFMINNUM(N);
1493   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1494   case ISD::FCEIL:              return visitFCEIL(N);
1495   case ISD::FTRUNC:             return visitFTRUNC(N);
1496   case ISD::BRCOND:             return visitBRCOND(N);
1497   case ISD::BR_CC:              return visitBR_CC(N);
1498   case ISD::LOAD:               return visitLOAD(N);
1499   case ISD::STORE:              return visitSTORE(N);
1500   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1501   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1502   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1503   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1504   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1505   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1506   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1507   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1508   case ISD::MGATHER:            return visitMGATHER(N);
1509   case ISD::MLOAD:              return visitMLOAD(N);
1510   case ISD::MSCATTER:           return visitMSCATTER(N);
1511   case ISD::MSTORE:             return visitMSTORE(N);
1512   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1513   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1514   }
1515   return SDValue();
1516 }
1517 
1518 SDValue DAGCombiner::combine(SDNode *N) {
1519   SDValue RV = visit(N);
1520 
1521   // If nothing happened, try a target-specific DAG combine.
1522   if (!RV.getNode()) {
1523     assert(N->getOpcode() != ISD::DELETED_NODE &&
1524            "Node was deleted but visit returned NULL!");
1525 
1526     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1527         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1528 
1529       // Expose the DAG combiner to the target combiner impls.
1530       TargetLowering::DAGCombinerInfo
1531         DagCombineInfo(DAG, Level, false, this);
1532 
1533       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1534     }
1535   }
1536 
1537   // If nothing happened still, try promoting the operation.
1538   if (!RV.getNode()) {
1539     switch (N->getOpcode()) {
1540     default: break;
1541     case ISD::ADD:
1542     case ISD::SUB:
1543     case ISD::MUL:
1544     case ISD::AND:
1545     case ISD::OR:
1546     case ISD::XOR:
1547       RV = PromoteIntBinOp(SDValue(N, 0));
1548       break;
1549     case ISD::SHL:
1550     case ISD::SRA:
1551     case ISD::SRL:
1552       RV = PromoteIntShiftOp(SDValue(N, 0));
1553       break;
1554     case ISD::SIGN_EXTEND:
1555     case ISD::ZERO_EXTEND:
1556     case ISD::ANY_EXTEND:
1557       RV = PromoteExtend(SDValue(N, 0));
1558       break;
1559     case ISD::LOAD:
1560       if (PromoteLoad(SDValue(N, 0)))
1561         RV = SDValue(N, 0);
1562       break;
1563     }
1564   }
1565 
1566   // If N is a commutative binary node, try commuting it to enable more
1567   // sdisel CSE.
1568   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1569       N->getNumValues() == 1) {
1570     SDValue N0 = N->getOperand(0);
1571     SDValue N1 = N->getOperand(1);
1572 
1573     // Constant operands are canonicalized to RHS.
1574     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1575       SDValue Ops[] = {N1, N0};
1576       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1577                                             N->getFlags());
1578       if (CSENode)
1579         return SDValue(CSENode, 0);
1580     }
1581   }
1582 
1583   return RV;
1584 }
1585 
1586 /// Given a node, return its input chain if it has one, otherwise return a null
1587 /// sd operand.
1588 static SDValue getInputChainForNode(SDNode *N) {
1589   if (unsigned NumOps = N->getNumOperands()) {
1590     if (N->getOperand(0).getValueType() == MVT::Other)
1591       return N->getOperand(0);
1592     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1593       return N->getOperand(NumOps-1);
1594     for (unsigned i = 1; i < NumOps-1; ++i)
1595       if (N->getOperand(i).getValueType() == MVT::Other)
1596         return N->getOperand(i);
1597   }
1598   return SDValue();
1599 }
1600 
1601 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1602   // If N has two operands, where one has an input chain equal to the other,
1603   // the 'other' chain is redundant.
1604   if (N->getNumOperands() == 2) {
1605     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1606       return N->getOperand(0);
1607     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1608       return N->getOperand(1);
1609   }
1610 
1611   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1612   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1613   SmallPtrSet<SDNode*, 16> SeenOps;
1614   bool Changed = false;             // If we should replace this token factor.
1615 
1616   // Start out with this token factor.
1617   TFs.push_back(N);
1618 
1619   // Iterate through token factors.  The TFs grows when new token factors are
1620   // encountered.
1621   for (unsigned i = 0; i < TFs.size(); ++i) {
1622     SDNode *TF = TFs[i];
1623 
1624     // Check each of the operands.
1625     for (const SDValue &Op : TF->op_values()) {
1626 
1627       switch (Op.getOpcode()) {
1628       case ISD::EntryToken:
1629         // Entry tokens don't need to be added to the list. They are
1630         // redundant.
1631         Changed = true;
1632         break;
1633 
1634       case ISD::TokenFactor:
1635         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1636           // Queue up for processing.
1637           TFs.push_back(Op.getNode());
1638           // Clean up in case the token factor is removed.
1639           AddToWorklist(Op.getNode());
1640           Changed = true;
1641           break;
1642         }
1643         LLVM_FALLTHROUGH;
1644 
1645       default:
1646         // Only add if it isn't already in the list.
1647         if (SeenOps.insert(Op.getNode()).second)
1648           Ops.push_back(Op);
1649         else
1650           Changed = true;
1651         break;
1652       }
1653     }
1654   }
1655 
1656   // Remove Nodes that are chained to another node in the list. Do so
1657   // by walking up chains breath-first stopping when we've seen
1658   // another operand. In general we must climb to the EntryNode, but we can exit
1659   // early if we find all remaining work is associated with just one operand as
1660   // no further pruning is possible.
1661 
1662   // List of nodes to search through and original Ops from which they originate.
1663   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1664   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1665   SmallPtrSet<SDNode *, 16> SeenChains;
1666   bool DidPruneOps = false;
1667 
1668   unsigned NumLeftToConsider = 0;
1669   for (const SDValue &Op : Ops) {
1670     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1671     OpWorkCount.push_back(1);
1672   }
1673 
1674   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1675     // If this is an Op, we can remove the op from the list. Remark any
1676     // search associated with it as from the current OpNumber.
1677     if (SeenOps.count(Op) != 0) {
1678       Changed = true;
1679       DidPruneOps = true;
1680       unsigned OrigOpNumber = 0;
1681       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1682         OrigOpNumber++;
1683       assert((OrigOpNumber != Ops.size()) &&
1684              "expected to find TokenFactor Operand");
1685       // Re-mark worklist from OrigOpNumber to OpNumber
1686       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1687         if (Worklist[i].second == OrigOpNumber) {
1688           Worklist[i].second = OpNumber;
1689         }
1690       }
1691       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1692       OpWorkCount[OrigOpNumber] = 0;
1693       NumLeftToConsider--;
1694     }
1695     // Add if it's a new chain
1696     if (SeenChains.insert(Op).second) {
1697       OpWorkCount[OpNumber]++;
1698       Worklist.push_back(std::make_pair(Op, OpNumber));
1699     }
1700   };
1701 
1702   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1703     // We need at least be consider at least 2 Ops to prune.
1704     if (NumLeftToConsider <= 1)
1705       break;
1706     auto CurNode = Worklist[i].first;
1707     auto CurOpNumber = Worklist[i].second;
1708     assert((OpWorkCount[CurOpNumber] > 0) &&
1709            "Node should not appear in worklist");
1710     switch (CurNode->getOpcode()) {
1711     case ISD::EntryToken:
1712       // Hitting EntryToken is the only way for the search to terminate without
1713       // hitting
1714       // another operand's search. Prevent us from marking this operand
1715       // considered.
1716       NumLeftToConsider++;
1717       break;
1718     case ISD::TokenFactor:
1719       for (const SDValue &Op : CurNode->op_values())
1720         AddToWorklist(i, Op.getNode(), CurOpNumber);
1721       break;
1722     case ISD::CopyFromReg:
1723     case ISD::CopyToReg:
1724       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1725       break;
1726     default:
1727       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1728         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1729       break;
1730     }
1731     OpWorkCount[CurOpNumber]--;
1732     if (OpWorkCount[CurOpNumber] == 0)
1733       NumLeftToConsider--;
1734   }
1735 
1736   // If we've changed things around then replace token factor.
1737   if (Changed) {
1738     SDValue Result;
1739     if (Ops.empty()) {
1740       // The entry token is the only possible outcome.
1741       Result = DAG.getEntryNode();
1742     } else {
1743       if (DidPruneOps) {
1744         SmallVector<SDValue, 8> PrunedOps;
1745         //
1746         for (const SDValue &Op : Ops) {
1747           if (SeenChains.count(Op.getNode()) == 0)
1748             PrunedOps.push_back(Op);
1749         }
1750         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1751       } else {
1752         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1753       }
1754     }
1755     return Result;
1756   }
1757   return SDValue();
1758 }
1759 
1760 /// MERGE_VALUES can always be eliminated.
1761 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1762   WorklistRemover DeadNodes(*this);
1763   // Replacing results may cause a different MERGE_VALUES to suddenly
1764   // be CSE'd with N, and carry its uses with it. Iterate until no
1765   // uses remain, to ensure that the node can be safely deleted.
1766   // First add the users of this node to the work list so that they
1767   // can be tried again once they have new operands.
1768   AddUsersToWorklist(N);
1769   do {
1770     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1771       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1772   } while (!N->use_empty());
1773   deleteAndRecombine(N);
1774   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1775 }
1776 
1777 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1778 /// ConstantSDNode pointer else nullptr.
1779 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1780   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1781   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1782 }
1783 
1784 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1785   auto BinOpcode = BO->getOpcode();
1786   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1787           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1788           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1789           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1790           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1791           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1792           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1793           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1794           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1795          "Unexpected binary operator");
1796 
1797   // Bail out if any constants are opaque because we can't constant fold those.
1798   SDValue C1 = BO->getOperand(1);
1799   if (!isConstantOrConstantVector(C1, true) &&
1800       !isConstantFPBuildVectorOrConstantFP(C1))
1801     return SDValue();
1802 
1803   // Don't do this unless the old select is going away. We want to eliminate the
1804   // binary operator, not replace a binop with a select.
1805   // TODO: Handle ISD::SELECT_CC.
1806   SDValue Sel = BO->getOperand(0);
1807   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1808     return SDValue();
1809 
1810   SDValue CT = Sel.getOperand(1);
1811   if (!isConstantOrConstantVector(CT, true) &&
1812       !isConstantFPBuildVectorOrConstantFP(CT))
1813     return SDValue();
1814 
1815   SDValue CF = Sel.getOperand(2);
1816   if (!isConstantOrConstantVector(CF, true) &&
1817       !isConstantFPBuildVectorOrConstantFP(CF))
1818     return SDValue();
1819 
1820   // We have a select-of-constants followed by a binary operator with a
1821   // constant. Eliminate the binop by pulling the constant math into the select.
1822   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1823   EVT VT = Sel.getValueType();
1824   SDLoc DL(Sel);
1825   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1826   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1827           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1828          "Failed to constant fold a binop with constant operands");
1829 
1830   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1831   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1832           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1833          "Failed to constant fold a binop with constant operands");
1834 
1835   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1836 }
1837 
1838 SDValue DAGCombiner::visitADD(SDNode *N) {
1839   SDValue N0 = N->getOperand(0);
1840   SDValue N1 = N->getOperand(1);
1841   EVT VT = N0.getValueType();
1842   SDLoc DL(N);
1843 
1844   // fold vector ops
1845   if (VT.isVector()) {
1846     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1847       return FoldedVOp;
1848 
1849     // fold (add x, 0) -> x, vector edition
1850     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1851       return N0;
1852     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1853       return N1;
1854   }
1855 
1856   // fold (add x, undef) -> undef
1857   if (N0.isUndef())
1858     return N0;
1859 
1860   if (N1.isUndef())
1861     return N1;
1862 
1863   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1864     // canonicalize constant to RHS
1865     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1866       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1867     // fold (add c1, c2) -> c1+c2
1868     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1869                                       N1.getNode());
1870   }
1871 
1872   // fold (add x, 0) -> x
1873   if (isNullConstant(N1))
1874     return N0;
1875 
1876   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1877     // fold ((c1-A)+c2) -> (c1+c2)-A
1878     if (N0.getOpcode() == ISD::SUB &&
1879         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1880       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1881       return DAG.getNode(ISD::SUB, DL, VT,
1882                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1883                          N0.getOperand(1));
1884     }
1885 
1886     // add (sext i1 X), 1 -> zext (not i1 X)
1887     // We don't transform this pattern:
1888     //   add (zext i1 X), -1 -> sext (not i1 X)
1889     // because most (?) targets generate better code for the zext form.
1890     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1891         isOneConstantOrOneSplatConstant(N1)) {
1892       SDValue X = N0.getOperand(0);
1893       if ((!LegalOperations ||
1894            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1895             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1896           X.getScalarValueSizeInBits() == 1) {
1897         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1898         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1899       }
1900     }
1901   }
1902 
1903   if (SDValue NewSel = foldBinOpIntoSelect(N))
1904     return NewSel;
1905 
1906   // reassociate add
1907   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1908     return RADD;
1909 
1910   // fold ((0-A) + B) -> B-A
1911   if (N0.getOpcode() == ISD::SUB &&
1912       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1913     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1914 
1915   // fold (A + (0-B)) -> A-B
1916   if (N1.getOpcode() == ISD::SUB &&
1917       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1918     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1919 
1920   // fold (A+(B-A)) -> B
1921   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1922     return N1.getOperand(0);
1923 
1924   // fold ((B-A)+A) -> B
1925   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1926     return N0.getOperand(0);
1927 
1928   // fold (A+(B-(A+C))) to (B-C)
1929   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1930       N0 == N1.getOperand(1).getOperand(0))
1931     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1932                        N1.getOperand(1).getOperand(1));
1933 
1934   // fold (A+(B-(C+A))) to (B-C)
1935   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1936       N0 == N1.getOperand(1).getOperand(1))
1937     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1938                        N1.getOperand(1).getOperand(0));
1939 
1940   // fold (A+((B-A)+or-C)) to (B+or-C)
1941   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1942       N1.getOperand(0).getOpcode() == ISD::SUB &&
1943       N0 == N1.getOperand(0).getOperand(1))
1944     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1945                        N1.getOperand(1));
1946 
1947   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1948   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1949     SDValue N00 = N0.getOperand(0);
1950     SDValue N01 = N0.getOperand(1);
1951     SDValue N10 = N1.getOperand(0);
1952     SDValue N11 = N1.getOperand(1);
1953 
1954     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1955       return DAG.getNode(ISD::SUB, DL, VT,
1956                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1957                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1958   }
1959 
1960   if (SimplifyDemandedBits(SDValue(N, 0)))
1961     return SDValue(N, 0);
1962 
1963   // fold (a+b) -> (a|b) iff a and b share no bits.
1964   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1965       DAG.haveNoCommonBitsSet(N0, N1))
1966     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1967 
1968   if (SDValue Combined = visitADDLike(N0, N1, N))
1969     return Combined;
1970 
1971   if (SDValue Combined = visitADDLike(N1, N0, N))
1972     return Combined;
1973 
1974   return SDValue();
1975 }
1976 
1977 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
1978   bool Masked = false;
1979 
1980   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
1981   while (true) {
1982     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
1983       V = V.getOperand(0);
1984       continue;
1985     }
1986 
1987     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
1988       Masked = true;
1989       V = V.getOperand(0);
1990       continue;
1991     }
1992 
1993     break;
1994   }
1995 
1996   // If this is not a carry, return.
1997   if (V.getResNo() != 1)
1998     return SDValue();
1999 
2000   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2001       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2002     return SDValue();
2003 
2004   // If the result is masked, then no matter what kind of bool it is we can
2005   // return. If it isn't, then we need to make sure the bool type is either 0 or
2006   // 1 and not other values.
2007   if (Masked ||
2008       TLI.getBooleanContents(V.getValueType()) ==
2009           TargetLoweringBase::ZeroOrOneBooleanContent)
2010     return V;
2011 
2012   return SDValue();
2013 }
2014 
2015 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2016   EVT VT = N0.getValueType();
2017   SDLoc DL(LocReference);
2018 
2019   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2020   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2021       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2022     return DAG.getNode(ISD::SUB, DL, VT, N0,
2023                        DAG.getNode(ISD::SHL, DL, VT,
2024                                    N1.getOperand(0).getOperand(1),
2025                                    N1.getOperand(1)));
2026 
2027   if (N1.getOpcode() == ISD::AND) {
2028     SDValue AndOp0 = N1.getOperand(0);
2029     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2030     unsigned DestBits = VT.getScalarSizeInBits();
2031 
2032     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2033     // and similar xforms where the inner op is either ~0 or 0.
2034     if (NumSignBits == DestBits &&
2035         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2036       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2037   }
2038 
2039   // add (sext i1), X -> sub X, (zext i1)
2040   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2041       N0.getOperand(0).getValueType() == MVT::i1 &&
2042       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2043     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2044     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2045   }
2046 
2047   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2048   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2049     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2050     if (TN->getVT() == MVT::i1) {
2051       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2052                                  DAG.getConstant(1, DL, VT));
2053       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2054     }
2055   }
2056 
2057   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2058   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2059     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2060                        N0, N1.getOperand(0), N1.getOperand(2));
2061 
2062   // (add X, Carry) -> (addcarry X, 0, Carry)
2063   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2064     if (SDValue Carry = getAsCarry(TLI, N1))
2065       return DAG.getNode(ISD::ADDCARRY, DL,
2066                          DAG.getVTList(VT, Carry.getValueType()), N0,
2067                          DAG.getConstant(0, DL, VT), Carry);
2068 
2069   return SDValue();
2070 }
2071 
2072 SDValue DAGCombiner::visitADDC(SDNode *N) {
2073   SDValue N0 = N->getOperand(0);
2074   SDValue N1 = N->getOperand(1);
2075   EVT VT = N0.getValueType();
2076   SDLoc DL(N);
2077 
2078   // If the flag result is dead, turn this into an ADD.
2079   if (!N->hasAnyUseOfValue(1))
2080     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2081                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2082 
2083   // canonicalize constant to RHS.
2084   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2085   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2086   if (N0C && !N1C)
2087     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2088 
2089   // fold (addc x, 0) -> x + no carry out
2090   if (isNullConstant(N1))
2091     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2092                                         DL, MVT::Glue));
2093 
2094   // If it cannot overflow, transform into an add.
2095   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2096     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2097                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2098 
2099   return SDValue();
2100 }
2101 
2102 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2103   SDValue N0 = N->getOperand(0);
2104   SDValue N1 = N->getOperand(1);
2105   EVT VT = N0.getValueType();
2106   if (VT.isVector())
2107     return SDValue();
2108 
2109   EVT CarryVT = N->getValueType(1);
2110   SDLoc DL(N);
2111 
2112   // If the flag result is dead, turn this into an ADD.
2113   if (!N->hasAnyUseOfValue(1))
2114     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2115                      DAG.getUNDEF(CarryVT));
2116 
2117   // canonicalize constant to RHS.
2118   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2119   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2120   if (N0C && !N1C)
2121     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2122 
2123   // fold (uaddo x, 0) -> x + no carry out
2124   if (isNullConstant(N1))
2125     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2126 
2127   // If it cannot overflow, transform into an add.
2128   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2129     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2130                      DAG.getConstant(0, DL, CarryVT));
2131 
2132   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2133     return Combined;
2134 
2135   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2136     return Combined;
2137 
2138   return SDValue();
2139 }
2140 
2141 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2142   auto VT = N0.getValueType();
2143 
2144   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2145   // If Y + 1 cannot overflow.
2146   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2147     SDValue Y = N1.getOperand(0);
2148     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2149     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2150       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2151                          N1.getOperand(2));
2152   }
2153 
2154   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2155   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2156     if (SDValue Carry = getAsCarry(TLI, N1))
2157       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2158                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2159 
2160   return SDValue();
2161 }
2162 
2163 SDValue DAGCombiner::visitADDE(SDNode *N) {
2164   SDValue N0 = N->getOperand(0);
2165   SDValue N1 = N->getOperand(1);
2166   SDValue CarryIn = N->getOperand(2);
2167 
2168   // canonicalize constant to RHS
2169   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2170   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2171   if (N0C && !N1C)
2172     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2173                        N1, N0, CarryIn);
2174 
2175   // fold (adde x, y, false) -> (addc x, y)
2176   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2177     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2178 
2179   return SDValue();
2180 }
2181 
2182 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2183   SDValue N0 = N->getOperand(0);
2184   SDValue N1 = N->getOperand(1);
2185   SDValue CarryIn = N->getOperand(2);
2186   SDLoc DL(N);
2187 
2188   // canonicalize constant to RHS
2189   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2190   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2191   if (N0C && !N1C)
2192     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2193 
2194   // fold (addcarry x, y, false) -> (uaddo x, y)
2195   if (isNullConstant(CarryIn))
2196     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2197 
2198   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2199   if (isNullConstant(N0) && isNullConstant(N1)) {
2200     EVT VT = N0.getValueType();
2201     EVT CarryVT = CarryIn.getValueType();
2202     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2203     AddToWorklist(CarryExt.getNode());
2204     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2205                                     DAG.getConstant(1, DL, VT)),
2206                      DAG.getConstant(0, DL, CarryVT));
2207   }
2208 
2209   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2210     return Combined;
2211 
2212   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2213     return Combined;
2214 
2215   return SDValue();
2216 }
2217 
2218 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2219                                        SDNode *N) {
2220   // Iff the flag result is dead:
2221   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2222   if ((N0.getOpcode() == ISD::ADD ||
2223        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2224       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2225     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2226                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2227 
2228   /**
2229    * When one of the addcarry argument is itself a carry, we may be facing
2230    * a diamond carry propagation. In which case we try to transform the DAG
2231    * to ensure linear carry propagation if that is possible.
2232    *
2233    * We are trying to get:
2234    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2235    */
2236   if (auto Y = getAsCarry(TLI, N1)) {
2237     /**
2238      *            (uaddo A, B)
2239      *             /       \
2240      *          Carry      Sum
2241      *            |          \
2242      *            | (addcarry *, 0, Z)
2243      *            |       /
2244      *             \   Carry
2245      *              |   /
2246      * (addcarry X, *, *)
2247      */
2248     if (Y.getOpcode() == ISD::UADDO &&
2249         CarryIn.getResNo() == 1 &&
2250         CarryIn.getOpcode() == ISD::ADDCARRY &&
2251         isNullConstant(CarryIn.getOperand(1)) &&
2252         CarryIn.getOperand(0) == Y.getValue(0)) {
2253       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2254                               Y.getOperand(0), Y.getOperand(1),
2255                               CarryIn.getOperand(2));
2256       AddToWorklist(NewY.getNode());
2257       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2258                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2259                          NewY.getValue(1));
2260     }
2261   }
2262 
2263   return SDValue();
2264 }
2265 
2266 // Since it may not be valid to emit a fold to zero for vector initializers
2267 // check if we can before folding.
2268 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2269                              SelectionDAG &DAG, bool LegalOperations,
2270                              bool LegalTypes) {
2271   if (!VT.isVector())
2272     return DAG.getConstant(0, DL, VT);
2273   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2274     return DAG.getConstant(0, DL, VT);
2275   return SDValue();
2276 }
2277 
2278 SDValue DAGCombiner::visitSUB(SDNode *N) {
2279   SDValue N0 = N->getOperand(0);
2280   SDValue N1 = N->getOperand(1);
2281   EVT VT = N0.getValueType();
2282   SDLoc DL(N);
2283 
2284   // fold vector ops
2285   if (VT.isVector()) {
2286     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2287       return FoldedVOp;
2288 
2289     // fold (sub x, 0) -> x, vector edition
2290     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2291       return N0;
2292   }
2293 
2294   // fold (sub x, x) -> 0
2295   // FIXME: Refactor this and xor and other similar operations together.
2296   if (N0 == N1)
2297     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2298   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2299       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2300     // fold (sub c1, c2) -> c1-c2
2301     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2302                                       N1.getNode());
2303   }
2304 
2305   if (SDValue NewSel = foldBinOpIntoSelect(N))
2306     return NewSel;
2307 
2308   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2309 
2310   // fold (sub x, c) -> (add x, -c)
2311   if (N1C) {
2312     return DAG.getNode(ISD::ADD, DL, VT, N0,
2313                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2314   }
2315 
2316   if (isNullConstantOrNullSplatConstant(N0)) {
2317     unsigned BitWidth = VT.getScalarSizeInBits();
2318     // Right-shifting everything out but the sign bit followed by negation is
2319     // the same as flipping arithmetic/logical shift type without the negation:
2320     // -(X >>u 31) -> (X >>s 31)
2321     // -(X >>s 31) -> (X >>u 31)
2322     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2323       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2324       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2325         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2326         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2327           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2328       }
2329     }
2330 
2331     // 0 - X --> 0 if the sub is NUW.
2332     if (N->getFlags().hasNoUnsignedWrap())
2333       return N0;
2334 
2335     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2336       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2337       // N1 must be 0 because negating the minimum signed value is undefined.
2338       if (N->getFlags().hasNoSignedWrap())
2339         return N0;
2340 
2341       // 0 - X --> X if X is 0 or the minimum signed value.
2342       return N1;
2343     }
2344   }
2345 
2346   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2347   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2348     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2349 
2350   // fold A-(A-B) -> B
2351   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2352     return N1.getOperand(1);
2353 
2354   // fold (A+B)-A -> B
2355   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2356     return N0.getOperand(1);
2357 
2358   // fold (A+B)-B -> A
2359   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2360     return N0.getOperand(0);
2361 
2362   // fold C2-(A+C1) -> (C2-C1)-A
2363   if (N1.getOpcode() == ISD::ADD) {
2364     SDValue N11 = N1.getOperand(1);
2365     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2366         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2367       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2368       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2369     }
2370   }
2371 
2372   // fold ((A+(B+or-C))-B) -> A+or-C
2373   if (N0.getOpcode() == ISD::ADD &&
2374       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2375        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2376       N0.getOperand(1).getOperand(0) == N1)
2377     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2378                        N0.getOperand(1).getOperand(1));
2379 
2380   // fold ((A+(C+B))-B) -> A+C
2381   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2382       N0.getOperand(1).getOperand(1) == N1)
2383     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2384                        N0.getOperand(1).getOperand(0));
2385 
2386   // fold ((A-(B-C))-C) -> A-B
2387   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2388       N0.getOperand(1).getOperand(1) == N1)
2389     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2390                        N0.getOperand(1).getOperand(0));
2391 
2392   // If either operand of a sub is undef, the result is undef
2393   if (N0.isUndef())
2394     return N0;
2395   if (N1.isUndef())
2396     return N1;
2397 
2398   // If the relocation model supports it, consider symbol offsets.
2399   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2400     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2401       // fold (sub Sym, c) -> Sym-c
2402       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2403         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2404                                     GA->getOffset() -
2405                                         (uint64_t)N1C->getSExtValue());
2406       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2407       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2408         if (GA->getGlobal() == GB->getGlobal())
2409           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2410                                  DL, VT);
2411     }
2412 
2413   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2414   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2415     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2416     if (TN->getVT() == MVT::i1) {
2417       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2418                                  DAG.getConstant(1, DL, VT));
2419       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2420     }
2421   }
2422 
2423   return SDValue();
2424 }
2425 
2426 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2427   SDValue N0 = N->getOperand(0);
2428   SDValue N1 = N->getOperand(1);
2429   EVT VT = N0.getValueType();
2430   SDLoc DL(N);
2431 
2432   // If the flag result is dead, turn this into an SUB.
2433   if (!N->hasAnyUseOfValue(1))
2434     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2435                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2436 
2437   // fold (subc x, x) -> 0 + no borrow
2438   if (N0 == N1)
2439     return CombineTo(N, DAG.getConstant(0, DL, VT),
2440                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2441 
2442   // fold (subc x, 0) -> x + no borrow
2443   if (isNullConstant(N1))
2444     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2445 
2446   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2447   if (isAllOnesConstant(N0))
2448     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2449                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2450 
2451   return SDValue();
2452 }
2453 
2454 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2455   SDValue N0 = N->getOperand(0);
2456   SDValue N1 = N->getOperand(1);
2457   EVT VT = N0.getValueType();
2458   if (VT.isVector())
2459     return SDValue();
2460 
2461   EVT CarryVT = N->getValueType(1);
2462   SDLoc DL(N);
2463 
2464   // If the flag result is dead, turn this into an SUB.
2465   if (!N->hasAnyUseOfValue(1))
2466     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2467                      DAG.getUNDEF(CarryVT));
2468 
2469   // fold (usubo x, x) -> 0 + no borrow
2470   if (N0 == N1)
2471     return CombineTo(N, DAG.getConstant(0, DL, VT),
2472                      DAG.getConstant(0, DL, CarryVT));
2473 
2474   // fold (usubo x, 0) -> x + no borrow
2475   if (isNullConstant(N1))
2476     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2477 
2478   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2479   if (isAllOnesConstant(N0))
2480     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2481                      DAG.getConstant(0, DL, CarryVT));
2482 
2483   return SDValue();
2484 }
2485 
2486 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2487   SDValue N0 = N->getOperand(0);
2488   SDValue N1 = N->getOperand(1);
2489   SDValue CarryIn = N->getOperand(2);
2490 
2491   // fold (sube x, y, false) -> (subc x, y)
2492   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2493     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2494 
2495   return SDValue();
2496 }
2497 
2498 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2499   SDValue N0 = N->getOperand(0);
2500   SDValue N1 = N->getOperand(1);
2501   SDValue CarryIn = N->getOperand(2);
2502 
2503   // fold (subcarry x, y, false) -> (usubo x, y)
2504   if (isNullConstant(CarryIn))
2505     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2506 
2507   return SDValue();
2508 }
2509 
2510 SDValue DAGCombiner::visitMUL(SDNode *N) {
2511   SDValue N0 = N->getOperand(0);
2512   SDValue N1 = N->getOperand(1);
2513   EVT VT = N0.getValueType();
2514 
2515   // fold (mul x, undef) -> 0
2516   if (N0.isUndef() || N1.isUndef())
2517     return DAG.getConstant(0, SDLoc(N), VT);
2518 
2519   bool N0IsConst = false;
2520   bool N1IsConst = false;
2521   bool N1IsOpaqueConst = false;
2522   bool N0IsOpaqueConst = false;
2523   APInt ConstValue0, ConstValue1;
2524   // fold vector ops
2525   if (VT.isVector()) {
2526     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2527       return FoldedVOp;
2528 
2529     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2530     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2531   } else {
2532     N0IsConst = isa<ConstantSDNode>(N0);
2533     if (N0IsConst) {
2534       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2535       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2536     }
2537     N1IsConst = isa<ConstantSDNode>(N1);
2538     if (N1IsConst) {
2539       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2540       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2541     }
2542   }
2543 
2544   // fold (mul c1, c2) -> c1*c2
2545   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2546     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2547                                       N0.getNode(), N1.getNode());
2548 
2549   // canonicalize constant to RHS (vector doesn't have to splat)
2550   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2551      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2552     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2553   // fold (mul x, 0) -> 0
2554   if (N1IsConst && ConstValue1.isNullValue())
2555     return N1;
2556   // We require a splat of the entire scalar bit width for non-contiguous
2557   // bit patterns.
2558   bool IsFullSplat =
2559     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2560   // fold (mul x, 1) -> x
2561   if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
2562     return N0;
2563 
2564   if (SDValue NewSel = foldBinOpIntoSelect(N))
2565     return NewSel;
2566 
2567   // fold (mul x, -1) -> 0-x
2568   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2569     SDLoc DL(N);
2570     return DAG.getNode(ISD::SUB, DL, VT,
2571                        DAG.getConstant(0, DL, VT), N0);
2572   }
2573   // fold (mul x, (1 << c)) -> x << c
2574   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2575       IsFullSplat) {
2576     SDLoc DL(N);
2577     return DAG.getNode(ISD::SHL, DL, VT, N0,
2578                        DAG.getConstant(ConstValue1.logBase2(), DL,
2579                                        getShiftAmountTy(N0.getValueType())));
2580   }
2581   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2582   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2583       IsFullSplat) {
2584     unsigned Log2Val = (-ConstValue1).logBase2();
2585     SDLoc DL(N);
2586     // FIXME: If the input is something that is easily negated (e.g. a
2587     // single-use add), we should put the negate there.
2588     return DAG.getNode(ISD::SUB, DL, VT,
2589                        DAG.getConstant(0, DL, VT),
2590                        DAG.getNode(ISD::SHL, DL, VT, N0,
2591                             DAG.getConstant(Log2Val, DL,
2592                                       getShiftAmountTy(N0.getValueType()))));
2593   }
2594 
2595   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2596   if (N0.getOpcode() == ISD::SHL &&
2597       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2598       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2599     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2600     if (isConstantOrConstantVector(C3))
2601       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2602   }
2603 
2604   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2605   // use.
2606   {
2607     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2608 
2609     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2610     if (N0.getOpcode() == ISD::SHL &&
2611         isConstantOrConstantVector(N0.getOperand(1)) &&
2612         N0.getNode()->hasOneUse()) {
2613       Sh = N0; Y = N1;
2614     } else if (N1.getOpcode() == ISD::SHL &&
2615                isConstantOrConstantVector(N1.getOperand(1)) &&
2616                N1.getNode()->hasOneUse()) {
2617       Sh = N1; Y = N0;
2618     }
2619 
2620     if (Sh.getNode()) {
2621       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2622       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2623     }
2624   }
2625 
2626   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2627   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2628       N0.getOpcode() == ISD::ADD &&
2629       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2630       isMulAddWithConstProfitable(N, N0, N1))
2631       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2632                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2633                                      N0.getOperand(0), N1),
2634                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2635                                      N0.getOperand(1), N1));
2636 
2637   // reassociate mul
2638   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2639     return RMUL;
2640 
2641   return SDValue();
2642 }
2643 
2644 /// Return true if divmod libcall is available.
2645 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2646                                      const TargetLowering &TLI) {
2647   RTLIB::Libcall LC;
2648   EVT NodeType = Node->getValueType(0);
2649   if (!NodeType.isSimple())
2650     return false;
2651   switch (NodeType.getSimpleVT().SimpleTy) {
2652   default: return false; // No libcall for vector types.
2653   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2654   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2655   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2656   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2657   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2658   }
2659 
2660   return TLI.getLibcallName(LC) != nullptr;
2661 }
2662 
2663 /// Issue divrem if both quotient and remainder are needed.
2664 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2665   if (Node->use_empty())
2666     return SDValue(); // This is a dead node, leave it alone.
2667 
2668   unsigned Opcode = Node->getOpcode();
2669   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2670   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2671 
2672   // DivMod lib calls can still work on non-legal types if using lib-calls.
2673   EVT VT = Node->getValueType(0);
2674   if (VT.isVector() || !VT.isInteger())
2675     return SDValue();
2676 
2677   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2678     return SDValue();
2679 
2680   // If DIVREM is going to get expanded into a libcall,
2681   // but there is no libcall available, then don't combine.
2682   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2683       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2684     return SDValue();
2685 
2686   // If div is legal, it's better to do the normal expansion
2687   unsigned OtherOpcode = 0;
2688   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2689     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2690     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2691       return SDValue();
2692   } else {
2693     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2694     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2695       return SDValue();
2696   }
2697 
2698   SDValue Op0 = Node->getOperand(0);
2699   SDValue Op1 = Node->getOperand(1);
2700   SDValue combined;
2701   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2702          UE = Op0.getNode()->use_end(); UI != UE;) {
2703     SDNode *User = *UI++;
2704     if (User == Node || User->use_empty())
2705       continue;
2706     // Convert the other matching node(s), too;
2707     // otherwise, the DIVREM may get target-legalized into something
2708     // target-specific that we won't be able to recognize.
2709     unsigned UserOpc = User->getOpcode();
2710     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2711         User->getOperand(0) == Op0 &&
2712         User->getOperand(1) == Op1) {
2713       if (!combined) {
2714         if (UserOpc == OtherOpcode) {
2715           SDVTList VTs = DAG.getVTList(VT, VT);
2716           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2717         } else if (UserOpc == DivRemOpc) {
2718           combined = SDValue(User, 0);
2719         } else {
2720           assert(UserOpc == Opcode);
2721           continue;
2722         }
2723       }
2724       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2725         CombineTo(User, combined);
2726       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2727         CombineTo(User, combined.getValue(1));
2728     }
2729   }
2730   return combined;
2731 }
2732 
2733 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2734   SDValue N0 = N->getOperand(0);
2735   SDValue N1 = N->getOperand(1);
2736   EVT VT = N->getValueType(0);
2737   SDLoc DL(N);
2738 
2739   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2740     return DAG.getUNDEF(VT);
2741 
2742   // undef / X -> 0
2743   // undef % X -> 0
2744   if (N0.isUndef())
2745     return DAG.getConstant(0, DL, VT);
2746 
2747   return SDValue();
2748 }
2749 
2750 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2751   SDValue N0 = N->getOperand(0);
2752   SDValue N1 = N->getOperand(1);
2753   EVT VT = N->getValueType(0);
2754 
2755   // fold vector ops
2756   if (VT.isVector())
2757     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2758       return FoldedVOp;
2759 
2760   SDLoc DL(N);
2761 
2762   // fold (sdiv c1, c2) -> c1/c2
2763   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2764   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2765   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2766     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2767   // fold (sdiv X, 1) -> X
2768   if (N1C && N1C->isOne())
2769     return N0;
2770   // fold (sdiv X, -1) -> 0-X
2771   if (N1C && N1C->isAllOnesValue())
2772     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2773 
2774   if (SDValue V = simplifyDivRem(N, DAG))
2775     return V;
2776 
2777   if (SDValue NewSel = foldBinOpIntoSelect(N))
2778     return NewSel;
2779 
2780   // If we know the sign bits of both operands are zero, strength reduce to a
2781   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2782   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2783     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2784 
2785   // fold (sdiv X, pow2) -> simple ops after legalize
2786   // FIXME: We check for the exact bit here because the generic lowering gives
2787   // better results in that case. The target-specific lowering should learn how
2788   // to handle exact sdivs efficiently.
2789   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2790       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2791                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2792     // Target-specific implementation of sdiv x, pow2.
2793     if (SDValue Res = BuildSDIVPow2(N))
2794       return Res;
2795 
2796     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2797 
2798     // Splat the sign bit into the register
2799     SDValue SGN =
2800         DAG.getNode(ISD::SRA, DL, VT, N0,
2801                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2802                                     getShiftAmountTy(N0.getValueType())));
2803     AddToWorklist(SGN.getNode());
2804 
2805     // Add (N0 < 0) ? abs2 - 1 : 0;
2806     SDValue SRL =
2807         DAG.getNode(ISD::SRL, DL, VT, SGN,
2808                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2809                                     getShiftAmountTy(SGN.getValueType())));
2810     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2811     AddToWorklist(SRL.getNode());
2812     AddToWorklist(ADD.getNode());    // Divide by pow2
2813     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2814                   DAG.getConstant(lg2, DL,
2815                                   getShiftAmountTy(ADD.getValueType())));
2816 
2817     // If we're dividing by a positive value, we're done.  Otherwise, we must
2818     // negate the result.
2819     if (N1C->getAPIntValue().isNonNegative())
2820       return SRA;
2821 
2822     AddToWorklist(SRA.getNode());
2823     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2824   }
2825 
2826   // If integer divide is expensive and we satisfy the requirements, emit an
2827   // alternate sequence.  Targets may check function attributes for size/speed
2828   // trade-offs.
2829   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2830   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2831     if (SDValue Op = BuildSDIV(N))
2832       return Op;
2833 
2834   // sdiv, srem -> sdivrem
2835   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2836   // true.  Otherwise, we break the simplification logic in visitREM().
2837   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2838     if (SDValue DivRem = useDivRem(N))
2839         return DivRem;
2840 
2841   return SDValue();
2842 }
2843 
2844 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2845   SDValue N0 = N->getOperand(0);
2846   SDValue N1 = N->getOperand(1);
2847   EVT VT = N->getValueType(0);
2848 
2849   // fold vector ops
2850   if (VT.isVector())
2851     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2852       return FoldedVOp;
2853 
2854   SDLoc DL(N);
2855 
2856   // fold (udiv c1, c2) -> c1/c2
2857   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2858   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2859   if (N0C && N1C)
2860     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2861                                                     N0C, N1C))
2862       return Folded;
2863 
2864   if (SDValue V = simplifyDivRem(N, DAG))
2865     return V;
2866 
2867   if (SDValue NewSel = foldBinOpIntoSelect(N))
2868     return NewSel;
2869 
2870   // fold (udiv x, (1 << c)) -> x >>u c
2871   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2872       DAG.isKnownToBeAPowerOfTwo(N1)) {
2873     SDValue LogBase2 = BuildLogBase2(N1, DL);
2874     AddToWorklist(LogBase2.getNode());
2875 
2876     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2877     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2878     AddToWorklist(Trunc.getNode());
2879     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2880   }
2881 
2882   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2883   if (N1.getOpcode() == ISD::SHL) {
2884     SDValue N10 = N1.getOperand(0);
2885     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2886         DAG.isKnownToBeAPowerOfTwo(N10)) {
2887       SDValue LogBase2 = BuildLogBase2(N10, DL);
2888       AddToWorklist(LogBase2.getNode());
2889 
2890       EVT ADDVT = N1.getOperand(1).getValueType();
2891       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2892       AddToWorklist(Trunc.getNode());
2893       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2894       AddToWorklist(Add.getNode());
2895       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2896     }
2897   }
2898 
2899   // fold (udiv x, c) -> alternate
2900   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2901   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2902     if (SDValue Op = BuildUDIV(N))
2903       return Op;
2904 
2905   // sdiv, srem -> sdivrem
2906   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2907   // true.  Otherwise, we break the simplification logic in visitREM().
2908   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2909     if (SDValue DivRem = useDivRem(N))
2910         return DivRem;
2911 
2912   return SDValue();
2913 }
2914 
2915 // handles ISD::SREM and ISD::UREM
2916 SDValue DAGCombiner::visitREM(SDNode *N) {
2917   unsigned Opcode = N->getOpcode();
2918   SDValue N0 = N->getOperand(0);
2919   SDValue N1 = N->getOperand(1);
2920   EVT VT = N->getValueType(0);
2921   bool isSigned = (Opcode == ISD::SREM);
2922   SDLoc DL(N);
2923 
2924   // fold (rem c1, c2) -> c1%c2
2925   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2926   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2927   if (N0C && N1C)
2928     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2929       return Folded;
2930 
2931   if (SDValue V = simplifyDivRem(N, DAG))
2932     return V;
2933 
2934   if (SDValue NewSel = foldBinOpIntoSelect(N))
2935     return NewSel;
2936 
2937   if (isSigned) {
2938     // If we know the sign bits of both operands are zero, strength reduce to a
2939     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2940     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2941       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2942   } else {
2943     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2944     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2945       // fold (urem x, pow2) -> (and x, pow2-1)
2946       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2947       AddToWorklist(Add.getNode());
2948       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2949     }
2950     if (N1.getOpcode() == ISD::SHL &&
2951         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2952       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2953       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2954       AddToWorklist(Add.getNode());
2955       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2956     }
2957   }
2958 
2959   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2960 
2961   // If X/C can be simplified by the division-by-constant logic, lower
2962   // X%C to the equivalent of X-X/C*C.
2963   // To avoid mangling nodes, this simplification requires that the combine()
2964   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2965   // against this by skipping the simplification if isIntDivCheap().  When
2966   // div is not cheap, combine will not return a DIVREM.  Regardless,
2967   // checking cheapness here makes sense since the simplification results in
2968   // fatter code.
2969   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2970     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2971     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2972     AddToWorklist(Div.getNode());
2973     SDValue OptimizedDiv = combine(Div.getNode());
2974     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2975       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2976              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2977       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2978       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2979       AddToWorklist(Mul.getNode());
2980       return Sub;
2981     }
2982   }
2983 
2984   // sdiv, srem -> sdivrem
2985   if (SDValue DivRem = useDivRem(N))
2986     return DivRem.getValue(1);
2987 
2988   return SDValue();
2989 }
2990 
2991 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2992   SDValue N0 = N->getOperand(0);
2993   SDValue N1 = N->getOperand(1);
2994   EVT VT = N->getValueType(0);
2995   SDLoc DL(N);
2996 
2997   // fold (mulhs x, 0) -> 0
2998   if (isNullConstant(N1))
2999     return N1;
3000   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3001   if (isOneConstant(N1)) {
3002     SDLoc DL(N);
3003     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3004                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3005                                        getShiftAmountTy(N0.getValueType())));
3006   }
3007   // fold (mulhs x, undef) -> 0
3008   if (N0.isUndef() || N1.isUndef())
3009     return DAG.getConstant(0, SDLoc(N), VT);
3010 
3011   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3012   // plus a shift.
3013   if (VT.isSimple() && !VT.isVector()) {
3014     MVT Simple = VT.getSimpleVT();
3015     unsigned SimpleSize = Simple.getSizeInBits();
3016     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3017     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3018       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3019       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3020       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3021       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3022             DAG.getConstant(SimpleSize, DL,
3023                             getShiftAmountTy(N1.getValueType())));
3024       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3025     }
3026   }
3027 
3028   return SDValue();
3029 }
3030 
3031 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3032   SDValue N0 = N->getOperand(0);
3033   SDValue N1 = N->getOperand(1);
3034   EVT VT = N->getValueType(0);
3035   SDLoc DL(N);
3036 
3037   // fold (mulhu x, 0) -> 0
3038   if (isNullConstant(N1))
3039     return N1;
3040   // fold (mulhu x, 1) -> 0
3041   if (isOneConstant(N1))
3042     return DAG.getConstant(0, DL, N0.getValueType());
3043   // fold (mulhu x, undef) -> 0
3044   if (N0.isUndef() || N1.isUndef())
3045     return DAG.getConstant(0, DL, VT);
3046 
3047   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3048   // plus a shift.
3049   if (VT.isSimple() && !VT.isVector()) {
3050     MVT Simple = VT.getSimpleVT();
3051     unsigned SimpleSize = Simple.getSizeInBits();
3052     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3053     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3054       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3055       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3056       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3057       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3058             DAG.getConstant(SimpleSize, DL,
3059                             getShiftAmountTy(N1.getValueType())));
3060       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3061     }
3062   }
3063 
3064   return SDValue();
3065 }
3066 
3067 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3068 /// give the opcodes for the two computations that are being performed. Return
3069 /// true if a simplification was made.
3070 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3071                                                 unsigned HiOp) {
3072   // If the high half is not needed, just compute the low half.
3073   bool HiExists = N->hasAnyUseOfValue(1);
3074   if (!HiExists &&
3075       (!LegalOperations ||
3076        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3077     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3078     return CombineTo(N, Res, Res);
3079   }
3080 
3081   // If the low half is not needed, just compute the high half.
3082   bool LoExists = N->hasAnyUseOfValue(0);
3083   if (!LoExists &&
3084       (!LegalOperations ||
3085        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3086     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3087     return CombineTo(N, Res, Res);
3088   }
3089 
3090   // If both halves are used, return as it is.
3091   if (LoExists && HiExists)
3092     return SDValue();
3093 
3094   // If the two computed results can be simplified separately, separate them.
3095   if (LoExists) {
3096     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3097     AddToWorklist(Lo.getNode());
3098     SDValue LoOpt = combine(Lo.getNode());
3099     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3100         (!LegalOperations ||
3101          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3102       return CombineTo(N, LoOpt, LoOpt);
3103   }
3104 
3105   if (HiExists) {
3106     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3107     AddToWorklist(Hi.getNode());
3108     SDValue HiOpt = combine(Hi.getNode());
3109     if (HiOpt.getNode() && HiOpt != Hi &&
3110         (!LegalOperations ||
3111          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3112       return CombineTo(N, HiOpt, HiOpt);
3113   }
3114 
3115   return SDValue();
3116 }
3117 
3118 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3119   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3120     return Res;
3121 
3122   EVT VT = N->getValueType(0);
3123   SDLoc DL(N);
3124 
3125   // If the type is twice as wide is legal, transform the mulhu to a wider
3126   // multiply plus a shift.
3127   if (VT.isSimple() && !VT.isVector()) {
3128     MVT Simple = VT.getSimpleVT();
3129     unsigned SimpleSize = Simple.getSizeInBits();
3130     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3131     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3132       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3133       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3134       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3135       // Compute the high part as N1.
3136       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3137             DAG.getConstant(SimpleSize, DL,
3138                             getShiftAmountTy(Lo.getValueType())));
3139       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3140       // Compute the low part as N0.
3141       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3142       return CombineTo(N, Lo, Hi);
3143     }
3144   }
3145 
3146   return SDValue();
3147 }
3148 
3149 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3150   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3151     return Res;
3152 
3153   EVT VT = N->getValueType(0);
3154   SDLoc DL(N);
3155 
3156   // If the type is twice as wide is legal, transform the mulhu to a wider
3157   // multiply plus a shift.
3158   if (VT.isSimple() && !VT.isVector()) {
3159     MVT Simple = VT.getSimpleVT();
3160     unsigned SimpleSize = Simple.getSizeInBits();
3161     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3162     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3163       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3164       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3165       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3166       // Compute the high part as N1.
3167       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3168             DAG.getConstant(SimpleSize, DL,
3169                             getShiftAmountTy(Lo.getValueType())));
3170       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3171       // Compute the low part as N0.
3172       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3173       return CombineTo(N, Lo, Hi);
3174     }
3175   }
3176 
3177   return SDValue();
3178 }
3179 
3180 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3181   // (smulo x, 2) -> (saddo x, x)
3182   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3183     if (C2->getAPIntValue() == 2)
3184       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3185                          N->getOperand(0), N->getOperand(0));
3186 
3187   return SDValue();
3188 }
3189 
3190 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3191   // (umulo x, 2) -> (uaddo x, x)
3192   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3193     if (C2->getAPIntValue() == 2)
3194       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3195                          N->getOperand(0), N->getOperand(0));
3196 
3197   return SDValue();
3198 }
3199 
3200 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3201   SDValue N0 = N->getOperand(0);
3202   SDValue N1 = N->getOperand(1);
3203   EVT VT = N0.getValueType();
3204 
3205   // fold vector ops
3206   if (VT.isVector())
3207     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3208       return FoldedVOp;
3209 
3210   // fold (add c1, c2) -> c1+c2
3211   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3212   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3213   if (N0C && N1C)
3214     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3215 
3216   // canonicalize constant to RHS
3217   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3218      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3219     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3220 
3221   return SDValue();
3222 }
3223 
3224 /// If this is a binary operator with two operands of the same opcode, try to
3225 /// simplify it.
3226 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3227   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3228   EVT VT = N0.getValueType();
3229   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3230 
3231   // Bail early if none of these transforms apply.
3232   if (N0.getNumOperands() == 0) return SDValue();
3233 
3234   // For each of OP in AND/OR/XOR:
3235   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3236   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3237   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3238   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3239   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3240   //
3241   // do not sink logical op inside of a vector extend, since it may combine
3242   // into a vsetcc.
3243   EVT Op0VT = N0.getOperand(0).getValueType();
3244   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3245        N0.getOpcode() == ISD::SIGN_EXTEND ||
3246        N0.getOpcode() == ISD::BSWAP ||
3247        // Avoid infinite looping with PromoteIntBinOp.
3248        (N0.getOpcode() == ISD::ANY_EXTEND &&
3249         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3250        (N0.getOpcode() == ISD::TRUNCATE &&
3251         (!TLI.isZExtFree(VT, Op0VT) ||
3252          !TLI.isTruncateFree(Op0VT, VT)) &&
3253         TLI.isTypeLegal(Op0VT))) &&
3254       !VT.isVector() &&
3255       Op0VT == N1.getOperand(0).getValueType() &&
3256       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3257     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3258                                  N0.getOperand(0).getValueType(),
3259                                  N0.getOperand(0), N1.getOperand(0));
3260     AddToWorklist(ORNode.getNode());
3261     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3262   }
3263 
3264   // For each of OP in SHL/SRL/SRA/AND...
3265   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3266   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3267   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3268   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3269        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3270       N0.getOperand(1) == N1.getOperand(1)) {
3271     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3272                                  N0.getOperand(0).getValueType(),
3273                                  N0.getOperand(0), N1.getOperand(0));
3274     AddToWorklist(ORNode.getNode());
3275     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3276                        ORNode, N0.getOperand(1));
3277   }
3278 
3279   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3280   // Only perform this optimization up until type legalization, before
3281   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3282   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3283   // we don't want to undo this promotion.
3284   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3285   // on scalars.
3286   if ((N0.getOpcode() == ISD::BITCAST ||
3287        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3288        Level <= AfterLegalizeTypes) {
3289     SDValue In0 = N0.getOperand(0);
3290     SDValue In1 = N1.getOperand(0);
3291     EVT In0Ty = In0.getValueType();
3292     EVT In1Ty = In1.getValueType();
3293     SDLoc DL(N);
3294     // If both incoming values are integers, and the original types are the
3295     // same.
3296     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3297       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3298       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3299       AddToWorklist(Op.getNode());
3300       return BC;
3301     }
3302   }
3303 
3304   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3305   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3306   // If both shuffles use the same mask, and both shuffle within a single
3307   // vector, then it is worthwhile to move the swizzle after the operation.
3308   // The type-legalizer generates this pattern when loading illegal
3309   // vector types from memory. In many cases this allows additional shuffle
3310   // optimizations.
3311   // There are other cases where moving the shuffle after the xor/and/or
3312   // is profitable even if shuffles don't perform a swizzle.
3313   // If both shuffles use the same mask, and both shuffles have the same first
3314   // or second operand, then it might still be profitable to move the shuffle
3315   // after the xor/and/or operation.
3316   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3317     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3318     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3319 
3320     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3321            "Inputs to shuffles are not the same type");
3322 
3323     // Check that both shuffles use the same mask. The masks are known to be of
3324     // the same length because the result vector type is the same.
3325     // Check also that shuffles have only one use to avoid introducing extra
3326     // instructions.
3327     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3328         SVN0->getMask().equals(SVN1->getMask())) {
3329       SDValue ShOp = N0->getOperand(1);
3330 
3331       // Don't try to fold this node if it requires introducing a
3332       // build vector of all zeros that might be illegal at this stage.
3333       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3334         if (!LegalTypes)
3335           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3336         else
3337           ShOp = SDValue();
3338       }
3339 
3340       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3341       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3342       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3343       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3344         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3345                                       N0->getOperand(0), N1->getOperand(0));
3346         AddToWorklist(NewNode.getNode());
3347         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3348                                     SVN0->getMask());
3349       }
3350 
3351       // Don't try to fold this node if it requires introducing a
3352       // build vector of all zeros that might be illegal at this stage.
3353       ShOp = N0->getOperand(0);
3354       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3355         if (!LegalTypes)
3356           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3357         else
3358           ShOp = SDValue();
3359       }
3360 
3361       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3362       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3363       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3364       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3365         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3366                                       N0->getOperand(1), N1->getOperand(1));
3367         AddToWorklist(NewNode.getNode());
3368         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3369                                     SVN0->getMask());
3370       }
3371     }
3372   }
3373 
3374   return SDValue();
3375 }
3376 
3377 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3378 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3379                                        const SDLoc &DL) {
3380   SDValue LL, LR, RL, RR, N0CC, N1CC;
3381   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3382       !isSetCCEquivalent(N1, RL, RR, N1CC))
3383     return SDValue();
3384 
3385   assert(N0.getValueType() == N1.getValueType() &&
3386          "Unexpected operand types for bitwise logic op");
3387   assert(LL.getValueType() == LR.getValueType() &&
3388          RL.getValueType() == RR.getValueType() &&
3389          "Unexpected operand types for setcc");
3390 
3391   // If we're here post-legalization or the logic op type is not i1, the logic
3392   // op type must match a setcc result type. Also, all folds require new
3393   // operations on the left and right operands, so those types must match.
3394   EVT VT = N0.getValueType();
3395   EVT OpVT = LL.getValueType();
3396   if (LegalOperations || VT != MVT::i1)
3397     if (VT != getSetCCResultType(OpVT))
3398       return SDValue();
3399   if (OpVT != RL.getValueType())
3400     return SDValue();
3401 
3402   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3403   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3404   bool IsInteger = OpVT.isInteger();
3405   if (LR == RR && CC0 == CC1 && IsInteger) {
3406     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3407     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3408 
3409     // All bits clear?
3410     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3411     // All sign bits clear?
3412     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3413     // Any bits set?
3414     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3415     // Any sign bits set?
3416     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3417 
3418     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3419     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3420     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3421     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3422     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3423       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3424       AddToWorklist(Or.getNode());
3425       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3426     }
3427 
3428     // All bits set?
3429     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3430     // All sign bits set?
3431     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3432     // Any bits clear?
3433     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3434     // Any sign bits clear?
3435     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3436 
3437     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3438     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3439     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3440     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3441     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3442       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3443       AddToWorklist(And.getNode());
3444       return DAG.getSetCC(DL, VT, And, LR, CC1);
3445     }
3446   }
3447 
3448   // TODO: What is the 'or' equivalent of this fold?
3449   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3450   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3451       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3452        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3453     SDValue One = DAG.getConstant(1, DL, OpVT);
3454     SDValue Two = DAG.getConstant(2, DL, OpVT);
3455     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3456     AddToWorklist(Add.getNode());
3457     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3458   }
3459 
3460   // Try more general transforms if the predicates match and the only user of
3461   // the compares is the 'and' or 'or'.
3462   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3463       N0.hasOneUse() && N1.hasOneUse()) {
3464     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3465     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3466     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3467       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3468       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3469       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3470       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3471       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3472     }
3473   }
3474 
3475   // Canonicalize equivalent operands to LL == RL.
3476   if (LL == RR && LR == RL) {
3477     CC1 = ISD::getSetCCSwappedOperands(CC1);
3478     std::swap(RL, RR);
3479   }
3480 
3481   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3482   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3483   if (LL == RL && LR == RR) {
3484     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3485                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3486     if (NewCC != ISD::SETCC_INVALID &&
3487         (!LegalOperations ||
3488          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3489           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3490       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3491   }
3492 
3493   return SDValue();
3494 }
3495 
3496 /// This contains all DAGCombine rules which reduce two values combined by
3497 /// an And operation to a single value. This makes them reusable in the context
3498 /// of visitSELECT(). Rules involving constants are not included as
3499 /// visitSELECT() already handles those cases.
3500 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3501   EVT VT = N1.getValueType();
3502   SDLoc DL(N);
3503 
3504   // fold (and x, undef) -> 0
3505   if (N0.isUndef() || N1.isUndef())
3506     return DAG.getConstant(0, DL, VT);
3507 
3508   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3509     return V;
3510 
3511   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3512       VT.getSizeInBits() <= 64) {
3513     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3514       APInt ADDC = ADDI->getAPIntValue();
3515       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3516         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3517         // immediate for an add, but it is legal if its top c2 bits are set,
3518         // transform the ADD so the immediate doesn't need to be materialized
3519         // in a register.
3520         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3521           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3522                                              SRLI->getZExtValue());
3523           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3524             ADDC |= Mask;
3525             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3526               SDLoc DL0(N0);
3527               SDValue NewAdd =
3528                 DAG.getNode(ISD::ADD, DL0, VT,
3529                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3530               CombineTo(N0.getNode(), NewAdd);
3531               // Return N so it doesn't get rechecked!
3532               return SDValue(N, 0);
3533             }
3534           }
3535         }
3536       }
3537     }
3538   }
3539 
3540   // Reduce bit extract of low half of an integer to the narrower type.
3541   // (and (srl i64:x, K), KMask) ->
3542   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3543   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3544     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3545       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3546         unsigned Size = VT.getSizeInBits();
3547         const APInt &AndMask = CAnd->getAPIntValue();
3548         unsigned ShiftBits = CShift->getZExtValue();
3549 
3550         // Bail out, this node will probably disappear anyway.
3551         if (ShiftBits == 0)
3552           return SDValue();
3553 
3554         unsigned MaskBits = AndMask.countTrailingOnes();
3555         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3556 
3557         if (AndMask.isMask() &&
3558             // Required bits must not span the two halves of the integer and
3559             // must fit in the half size type.
3560             (ShiftBits + MaskBits <= Size / 2) &&
3561             TLI.isNarrowingProfitable(VT, HalfVT) &&
3562             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3563             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3564             TLI.isTruncateFree(VT, HalfVT) &&
3565             TLI.isZExtFree(HalfVT, VT)) {
3566           // The isNarrowingProfitable is to avoid regressions on PPC and
3567           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3568           // on downstream users of this. Those patterns could probably be
3569           // extended to handle extensions mixed in.
3570 
3571           SDValue SL(N0);
3572           assert(MaskBits <= Size);
3573 
3574           // Extracting the highest bit of the low half.
3575           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3576           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3577                                       N0.getOperand(0));
3578 
3579           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3580           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3581           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3582           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3583           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3584         }
3585       }
3586     }
3587   }
3588 
3589   return SDValue();
3590 }
3591 
3592 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3593                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3594                                    bool &NarrowLoad) {
3595   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3596 
3597   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3598     return false;
3599 
3600   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3601   LoadedVT = LoadN->getMemoryVT();
3602 
3603   if (ExtVT == LoadedVT &&
3604       (!LegalOperations ||
3605        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3606     // ZEXTLOAD will match without needing to change the size of the value being
3607     // loaded.
3608     NarrowLoad = false;
3609     return true;
3610   }
3611 
3612   // Do not change the width of a volatile load.
3613   if (LoadN->isVolatile())
3614     return false;
3615 
3616   // Do not generate loads of non-round integer types since these can
3617   // be expensive (and would be wrong if the type is not byte sized).
3618   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3619     return false;
3620 
3621   if (LegalOperations &&
3622       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3623     return false;
3624 
3625   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3626     return false;
3627 
3628   NarrowLoad = true;
3629   return true;
3630 }
3631 
3632 SDValue DAGCombiner::visitAND(SDNode *N) {
3633   SDValue N0 = N->getOperand(0);
3634   SDValue N1 = N->getOperand(1);
3635   EVT VT = N1.getValueType();
3636 
3637   // x & x --> x
3638   if (N0 == N1)
3639     return N0;
3640 
3641   // fold vector ops
3642   if (VT.isVector()) {
3643     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3644       return FoldedVOp;
3645 
3646     // fold (and x, 0) -> 0, vector edition
3647     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3648       // do not return N0, because undef node may exist in N0
3649       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3650                              SDLoc(N), N0.getValueType());
3651     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3652       // do not return N1, because undef node may exist in N1
3653       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3654                              SDLoc(N), N1.getValueType());
3655 
3656     // fold (and x, -1) -> x, vector edition
3657     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3658       return N1;
3659     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3660       return N0;
3661   }
3662 
3663   // fold (and c1, c2) -> c1&c2
3664   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3665   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3666   if (N0C && N1C && !N1C->isOpaque())
3667     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3668   // canonicalize constant to RHS
3669   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3670      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3671     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3672   // fold (and x, -1) -> x
3673   if (isAllOnesConstant(N1))
3674     return N0;
3675   // if (and x, c) is known to be zero, return 0
3676   unsigned BitWidth = VT.getScalarSizeInBits();
3677   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3678                                    APInt::getAllOnesValue(BitWidth)))
3679     return DAG.getConstant(0, SDLoc(N), VT);
3680 
3681   if (SDValue NewSel = foldBinOpIntoSelect(N))
3682     return NewSel;
3683 
3684   // reassociate and
3685   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3686     return RAND;
3687   // fold (and (or x, C), D) -> D if (C & D) == D
3688   if (N1C && N0.getOpcode() == ISD::OR)
3689     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3690       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3691         return N1;
3692   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3693   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3694     SDValue N0Op0 = N0.getOperand(0);
3695     APInt Mask = ~N1C->getAPIntValue();
3696     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3697     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3698       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3699                                  N0.getValueType(), N0Op0);
3700 
3701       // Replace uses of the AND with uses of the Zero extend node.
3702       CombineTo(N, Zext);
3703 
3704       // We actually want to replace all uses of the any_extend with the
3705       // zero_extend, to avoid duplicating things.  This will later cause this
3706       // AND to be folded.
3707       CombineTo(N0.getNode(), Zext);
3708       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3709     }
3710   }
3711   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3712   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3713   // already be zero by virtue of the width of the base type of the load.
3714   //
3715   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3716   // more cases.
3717   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3718        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3719        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3720        N0.getOperand(0).getResNo() == 0) ||
3721       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3722     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3723                                          N0 : N0.getOperand(0) );
3724 
3725     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3726     // This can be a pure constant or a vector splat, in which case we treat the
3727     // vector as a scalar and use the splat value.
3728     APInt Constant = APInt::getNullValue(1);
3729     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3730       Constant = C->getAPIntValue();
3731     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3732       APInt SplatValue, SplatUndef;
3733       unsigned SplatBitSize;
3734       bool HasAnyUndefs;
3735       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3736                                              SplatBitSize, HasAnyUndefs);
3737       if (IsSplat) {
3738         // Undef bits can contribute to a possible optimisation if set, so
3739         // set them.
3740         SplatValue |= SplatUndef;
3741 
3742         // The splat value may be something like "0x00FFFFFF", which means 0 for
3743         // the first vector value and FF for the rest, repeating. We need a mask
3744         // that will apply equally to all members of the vector, so AND all the
3745         // lanes of the constant together.
3746         EVT VT = Vector->getValueType(0);
3747         unsigned BitWidth = VT.getScalarSizeInBits();
3748 
3749         // If the splat value has been compressed to a bitlength lower
3750         // than the size of the vector lane, we need to re-expand it to
3751         // the lane size.
3752         if (BitWidth > SplatBitSize)
3753           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3754                SplatBitSize < BitWidth;
3755                SplatBitSize = SplatBitSize * 2)
3756             SplatValue |= SplatValue.shl(SplatBitSize);
3757 
3758         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3759         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3760         if (SplatBitSize % BitWidth == 0) {
3761           Constant = APInt::getAllOnesValue(BitWidth);
3762           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3763             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3764         }
3765       }
3766     }
3767 
3768     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3769     // actually legal and isn't going to get expanded, else this is a false
3770     // optimisation.
3771     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3772                                                     Load->getValueType(0),
3773                                                     Load->getMemoryVT());
3774 
3775     // Resize the constant to the same size as the original memory access before
3776     // extension. If it is still the AllOnesValue then this AND is completely
3777     // unneeded.
3778     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3779 
3780     bool B;
3781     switch (Load->getExtensionType()) {
3782     default: B = false; break;
3783     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3784     case ISD::ZEXTLOAD:
3785     case ISD::NON_EXTLOAD: B = true; break;
3786     }
3787 
3788     if (B && Constant.isAllOnesValue()) {
3789       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3790       // preserve semantics once we get rid of the AND.
3791       SDValue NewLoad(Load, 0);
3792 
3793       // Fold the AND away. NewLoad may get replaced immediately.
3794       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3795 
3796       if (Load->getExtensionType() == ISD::EXTLOAD) {
3797         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3798                               Load->getValueType(0), SDLoc(Load),
3799                               Load->getChain(), Load->getBasePtr(),
3800                               Load->getOffset(), Load->getMemoryVT(),
3801                               Load->getMemOperand());
3802         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3803         if (Load->getNumValues() == 3) {
3804           // PRE/POST_INC loads have 3 values.
3805           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3806                            NewLoad.getValue(2) };
3807           CombineTo(Load, To, 3, true);
3808         } else {
3809           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3810         }
3811       }
3812 
3813       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3814     }
3815   }
3816 
3817   // fold (and (load x), 255) -> (zextload x, i8)
3818   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3819   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3820   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3821                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3822                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3823     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3824     LoadSDNode *LN0 = HasAnyExt
3825       ? cast<LoadSDNode>(N0.getOperand(0))
3826       : cast<LoadSDNode>(N0);
3827     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3828         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3829       auto NarrowLoad = false;
3830       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3831       EVT ExtVT, LoadedVT;
3832       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3833                            NarrowLoad)) {
3834         if (!NarrowLoad) {
3835           SDValue NewLoad =
3836             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3837                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3838                            LN0->getMemOperand());
3839           AddToWorklist(N);
3840           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3841           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3842         } else {
3843           EVT PtrType = LN0->getOperand(1).getValueType();
3844 
3845           unsigned Alignment = LN0->getAlignment();
3846           SDValue NewPtr = LN0->getBasePtr();
3847 
3848           // For big endian targets, we need to add an offset to the pointer
3849           // to load the correct bytes.  For little endian systems, we merely
3850           // need to read fewer bytes from the same pointer.
3851           if (DAG.getDataLayout().isBigEndian()) {
3852             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3853             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3854             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3855             SDLoc DL(LN0);
3856             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3857                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3858             Alignment = MinAlign(Alignment, PtrOff);
3859           }
3860 
3861           AddToWorklist(NewPtr.getNode());
3862 
3863           SDValue Load = DAG.getExtLoad(
3864               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3865               LN0->getPointerInfo(), ExtVT, Alignment,
3866               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3867           AddToWorklist(N);
3868           CombineTo(LN0, Load, Load.getValue(1));
3869           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3870         }
3871       }
3872     }
3873   }
3874 
3875   if (SDValue Combined = visitANDLike(N0, N1, N))
3876     return Combined;
3877 
3878   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3879   if (N0.getOpcode() == N1.getOpcode())
3880     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3881       return Tmp;
3882 
3883   // Masking the negated extension of a boolean is just the zero-extended
3884   // boolean:
3885   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3886   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3887   //
3888   // Note: the SimplifyDemandedBits fold below can make an information-losing
3889   // transform, and then we have no way to find this better fold.
3890   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3891     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3892     SDValue SubRHS = N0.getOperand(1);
3893     if (SubLHS && SubLHS->isNullValue()) {
3894       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3895           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3896         return SubRHS;
3897       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3898           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3899         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3900     }
3901   }
3902 
3903   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3904   // fold (and (sra)) -> (and (srl)) when possible.
3905   if (SimplifyDemandedBits(SDValue(N, 0)))
3906     return SDValue(N, 0);
3907 
3908   // fold (zext_inreg (extload x)) -> (zextload x)
3909   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3910     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3911     EVT MemVT = LN0->getMemoryVT();
3912     // If we zero all the possible extended bits, then we can turn this into
3913     // a zextload if we are running before legalize or the operation is legal.
3914     unsigned BitWidth = N1.getScalarValueSizeInBits();
3915     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3916                            BitWidth - MemVT.getScalarSizeInBits())) &&
3917         ((!LegalOperations && !LN0->isVolatile()) ||
3918          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3919       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3920                                        LN0->getChain(), LN0->getBasePtr(),
3921                                        MemVT, LN0->getMemOperand());
3922       AddToWorklist(N);
3923       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3924       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3925     }
3926   }
3927   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3928   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3929       N0.hasOneUse()) {
3930     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3931     EVT MemVT = LN0->getMemoryVT();
3932     // If we zero all the possible extended bits, then we can turn this into
3933     // a zextload if we are running before legalize or the operation is legal.
3934     unsigned BitWidth = N1.getScalarValueSizeInBits();
3935     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3936                            BitWidth - MemVT.getScalarSizeInBits())) &&
3937         ((!LegalOperations && !LN0->isVolatile()) ||
3938          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3939       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3940                                        LN0->getChain(), LN0->getBasePtr(),
3941                                        MemVT, LN0->getMemOperand());
3942       AddToWorklist(N);
3943       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3944       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3945     }
3946   }
3947   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3948   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3949     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3950                                            N0.getOperand(1), false))
3951       return BSwap;
3952   }
3953 
3954   return SDValue();
3955 }
3956 
3957 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3958 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3959                                         bool DemandHighBits) {
3960   if (!LegalOperations)
3961     return SDValue();
3962 
3963   EVT VT = N->getValueType(0);
3964   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3965     return SDValue();
3966   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
3967     return SDValue();
3968 
3969   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3970   bool LookPassAnd0 = false;
3971   bool LookPassAnd1 = false;
3972   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3973       std::swap(N0, N1);
3974   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3975       std::swap(N0, N1);
3976   if (N0.getOpcode() == ISD::AND) {
3977     if (!N0.getNode()->hasOneUse())
3978       return SDValue();
3979     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3980     if (!N01C || N01C->getZExtValue() != 0xFF00)
3981       return SDValue();
3982     N0 = N0.getOperand(0);
3983     LookPassAnd0 = true;
3984   }
3985 
3986   if (N1.getOpcode() == ISD::AND) {
3987     if (!N1.getNode()->hasOneUse())
3988       return SDValue();
3989     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3990     if (!N11C || N11C->getZExtValue() != 0xFF)
3991       return SDValue();
3992     N1 = N1.getOperand(0);
3993     LookPassAnd1 = true;
3994   }
3995 
3996   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3997     std::swap(N0, N1);
3998   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3999     return SDValue();
4000   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4001     return SDValue();
4002 
4003   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4004   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4005   if (!N01C || !N11C)
4006     return SDValue();
4007   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4008     return SDValue();
4009 
4010   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4011   SDValue N00 = N0->getOperand(0);
4012   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4013     if (!N00.getNode()->hasOneUse())
4014       return SDValue();
4015     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4016     if (!N001C || N001C->getZExtValue() != 0xFF)
4017       return SDValue();
4018     N00 = N00.getOperand(0);
4019     LookPassAnd0 = true;
4020   }
4021 
4022   SDValue N10 = N1->getOperand(0);
4023   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4024     if (!N10.getNode()->hasOneUse())
4025       return SDValue();
4026     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4027     if (!N101C || N101C->getZExtValue() != 0xFF00)
4028       return SDValue();
4029     N10 = N10.getOperand(0);
4030     LookPassAnd1 = true;
4031   }
4032 
4033   if (N00 != N10)
4034     return SDValue();
4035 
4036   // Make sure everything beyond the low halfword gets set to zero since the SRL
4037   // 16 will clear the top bits.
4038   unsigned OpSizeInBits = VT.getSizeInBits();
4039   if (DemandHighBits && OpSizeInBits > 16) {
4040     // If the left-shift isn't masked out then the only way this is a bswap is
4041     // if all bits beyond the low 8 are 0. In that case the entire pattern
4042     // reduces to a left shift anyway: leave it for other parts of the combiner.
4043     if (!LookPassAnd0)
4044       return SDValue();
4045 
4046     // However, if the right shift isn't masked out then it might be because
4047     // it's not needed. See if we can spot that too.
4048     if (!LookPassAnd1 &&
4049         !DAG.MaskedValueIsZero(
4050             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4051       return SDValue();
4052   }
4053 
4054   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4055   if (OpSizeInBits > 16) {
4056     SDLoc DL(N);
4057     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4058                       DAG.getConstant(OpSizeInBits - 16, DL,
4059                                       getShiftAmountTy(VT)));
4060   }
4061   return Res;
4062 }
4063 
4064 /// Return true if the specified node is an element that makes up a 32-bit
4065 /// packed halfword byteswap.
4066 /// ((x & 0x000000ff) << 8) |
4067 /// ((x & 0x0000ff00) >> 8) |
4068 /// ((x & 0x00ff0000) << 8) |
4069 /// ((x & 0xff000000) >> 8)
4070 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4071   if (!N.getNode()->hasOneUse())
4072     return false;
4073 
4074   unsigned Opc = N.getOpcode();
4075   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4076     return false;
4077 
4078   SDValue N0 = N.getOperand(0);
4079   unsigned Opc0 = N0.getOpcode();
4080   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4081     return false;
4082 
4083   ConstantSDNode *N1C = nullptr;
4084   // SHL or SRL: look upstream for AND mask operand
4085   if (Opc == ISD::AND)
4086     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4087   else if (Opc0 == ISD::AND)
4088     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4089   if (!N1C)
4090     return false;
4091 
4092   unsigned MaskByteOffset;
4093   switch (N1C->getZExtValue()) {
4094   default:
4095     return false;
4096   case 0xFF:       MaskByteOffset = 0; break;
4097   case 0xFF00:     MaskByteOffset = 1; break;
4098   case 0xFF0000:   MaskByteOffset = 2; break;
4099   case 0xFF000000: MaskByteOffset = 3; break;
4100   }
4101 
4102   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4103   if (Opc == ISD::AND) {
4104     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4105       // (x >> 8) & 0xff
4106       // (x >> 8) & 0xff0000
4107       if (Opc0 != ISD::SRL)
4108         return false;
4109       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4110       if (!C || C->getZExtValue() != 8)
4111         return false;
4112     } else {
4113       // (x << 8) & 0xff00
4114       // (x << 8) & 0xff000000
4115       if (Opc0 != ISD::SHL)
4116         return false;
4117       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4118       if (!C || C->getZExtValue() != 8)
4119         return false;
4120     }
4121   } else if (Opc == ISD::SHL) {
4122     // (x & 0xff) << 8
4123     // (x & 0xff0000) << 8
4124     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4125       return false;
4126     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4127     if (!C || C->getZExtValue() != 8)
4128       return false;
4129   } else { // Opc == ISD::SRL
4130     // (x & 0xff00) >> 8
4131     // (x & 0xff000000) >> 8
4132     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4133       return false;
4134     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4135     if (!C || C->getZExtValue() != 8)
4136       return false;
4137   }
4138 
4139   if (Parts[MaskByteOffset])
4140     return false;
4141 
4142   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4143   return true;
4144 }
4145 
4146 /// Match a 32-bit packed halfword bswap. That is
4147 /// ((x & 0x000000ff) << 8) |
4148 /// ((x & 0x0000ff00) >> 8) |
4149 /// ((x & 0x00ff0000) << 8) |
4150 /// ((x & 0xff000000) >> 8)
4151 /// => (rotl (bswap x), 16)
4152 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4153   if (!LegalOperations)
4154     return SDValue();
4155 
4156   EVT VT = N->getValueType(0);
4157   if (VT != MVT::i32)
4158     return SDValue();
4159   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4160     return SDValue();
4161 
4162   // Look for either
4163   // (or (or (and), (and)), (or (and), (and)))
4164   // (or (or (or (and), (and)), (and)), (and))
4165   if (N0.getOpcode() != ISD::OR)
4166     return SDValue();
4167   SDValue N00 = N0.getOperand(0);
4168   SDValue N01 = N0.getOperand(1);
4169   SDNode *Parts[4] = {};
4170 
4171   if (N1.getOpcode() == ISD::OR &&
4172       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4173     // (or (or (and), (and)), (or (and), (and)))
4174     if (!isBSwapHWordElement(N00, Parts))
4175       return SDValue();
4176 
4177     if (!isBSwapHWordElement(N01, Parts))
4178       return SDValue();
4179     SDValue N10 = N1.getOperand(0);
4180     if (!isBSwapHWordElement(N10, Parts))
4181       return SDValue();
4182     SDValue N11 = N1.getOperand(1);
4183     if (!isBSwapHWordElement(N11, Parts))
4184       return SDValue();
4185   } else {
4186     // (or (or (or (and), (and)), (and)), (and))
4187     if (!isBSwapHWordElement(N1, Parts))
4188       return SDValue();
4189     if (!isBSwapHWordElement(N01, Parts))
4190       return SDValue();
4191     if (N00.getOpcode() != ISD::OR)
4192       return SDValue();
4193     SDValue N000 = N00.getOperand(0);
4194     if (!isBSwapHWordElement(N000, Parts))
4195       return SDValue();
4196     SDValue N001 = N00.getOperand(1);
4197     if (!isBSwapHWordElement(N001, Parts))
4198       return SDValue();
4199   }
4200 
4201   // Make sure the parts are all coming from the same node.
4202   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4203     return SDValue();
4204 
4205   SDLoc DL(N);
4206   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4207                               SDValue(Parts[0], 0));
4208 
4209   // Result of the bswap should be rotated by 16. If it's not legal, then
4210   // do  (x << 16) | (x >> 16).
4211   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4212   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4213     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4214   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4215     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4216   return DAG.getNode(ISD::OR, DL, VT,
4217                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4218                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4219 }
4220 
4221 /// This contains all DAGCombine rules which reduce two values combined by
4222 /// an Or operation to a single value \see visitANDLike().
4223 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4224   EVT VT = N1.getValueType();
4225   SDLoc DL(N);
4226 
4227   // fold (or x, undef) -> -1
4228   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4229     return DAG.getAllOnesConstant(DL, VT);
4230 
4231   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4232     return V;
4233 
4234   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4235   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4236       // Don't increase # computations.
4237       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4238     // We can only do this xform if we know that bits from X that are set in C2
4239     // but not in C1 are already zero.  Likewise for Y.
4240     if (const ConstantSDNode *N0O1C =
4241         getAsNonOpaqueConstant(N0.getOperand(1))) {
4242       if (const ConstantSDNode *N1O1C =
4243           getAsNonOpaqueConstant(N1.getOperand(1))) {
4244         // We can only do this xform if we know that bits from X that are set in
4245         // C2 but not in C1 are already zero.  Likewise for Y.
4246         const APInt &LHSMask = N0O1C->getAPIntValue();
4247         const APInt &RHSMask = N1O1C->getAPIntValue();
4248 
4249         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4250             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4251           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4252                                   N0.getOperand(0), N1.getOperand(0));
4253           return DAG.getNode(ISD::AND, DL, VT, X,
4254                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4255         }
4256       }
4257     }
4258   }
4259 
4260   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4261   if (N0.getOpcode() == ISD::AND &&
4262       N1.getOpcode() == ISD::AND &&
4263       N0.getOperand(0) == N1.getOperand(0) &&
4264       // Don't increase # computations.
4265       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4266     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4267                             N0.getOperand(1), N1.getOperand(1));
4268     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4269   }
4270 
4271   return SDValue();
4272 }
4273 
4274 SDValue DAGCombiner::visitOR(SDNode *N) {
4275   SDValue N0 = N->getOperand(0);
4276   SDValue N1 = N->getOperand(1);
4277   EVT VT = N1.getValueType();
4278 
4279   // x | x --> x
4280   if (N0 == N1)
4281     return N0;
4282 
4283   // fold vector ops
4284   if (VT.isVector()) {
4285     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4286       return FoldedVOp;
4287 
4288     // fold (or x, 0) -> x, vector edition
4289     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4290       return N1;
4291     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4292       return N0;
4293 
4294     // fold (or x, -1) -> -1, vector edition
4295     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4296       // do not return N0, because undef node may exist in N0
4297       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4298     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4299       // do not return N1, because undef node may exist in N1
4300       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4301 
4302     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4303     // Do this only if the resulting shuffle is legal.
4304     if (isa<ShuffleVectorSDNode>(N0) &&
4305         isa<ShuffleVectorSDNode>(N1) &&
4306         // Avoid folding a node with illegal type.
4307         TLI.isTypeLegal(VT)) {
4308       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4309       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4310       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4311       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4312       // Ensure both shuffles have a zero input.
4313       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4314         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4315         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4316         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4317         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4318         bool CanFold = true;
4319         int NumElts = VT.getVectorNumElements();
4320         SmallVector<int, 4> Mask(NumElts);
4321 
4322         for (int i = 0; i != NumElts; ++i) {
4323           int M0 = SV0->getMaskElt(i);
4324           int M1 = SV1->getMaskElt(i);
4325 
4326           // Determine if either index is pointing to a zero vector.
4327           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4328           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4329 
4330           // If one element is zero and the otherside is undef, keep undef.
4331           // This also handles the case that both are undef.
4332           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4333             Mask[i] = -1;
4334             continue;
4335           }
4336 
4337           // Make sure only one of the elements is zero.
4338           if (M0Zero == M1Zero) {
4339             CanFold = false;
4340             break;
4341           }
4342 
4343           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4344 
4345           // We have a zero and non-zero element. If the non-zero came from
4346           // SV0 make the index a LHS index. If it came from SV1, make it
4347           // a RHS index. We need to mod by NumElts because we don't care
4348           // which operand it came from in the original shuffles.
4349           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4350         }
4351 
4352         if (CanFold) {
4353           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4354           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4355 
4356           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4357           if (!LegalMask) {
4358             std::swap(NewLHS, NewRHS);
4359             ShuffleVectorSDNode::commuteMask(Mask);
4360             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4361           }
4362 
4363           if (LegalMask)
4364             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4365         }
4366       }
4367     }
4368   }
4369 
4370   // fold (or c1, c2) -> c1|c2
4371   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4372   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4373   if (N0C && N1C && !N1C->isOpaque())
4374     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4375   // canonicalize constant to RHS
4376   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4377      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4378     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4379   // fold (or x, 0) -> x
4380   if (isNullConstant(N1))
4381     return N0;
4382   // fold (or x, -1) -> -1
4383   if (isAllOnesConstant(N1))
4384     return N1;
4385 
4386   if (SDValue NewSel = foldBinOpIntoSelect(N))
4387     return NewSel;
4388 
4389   // fold (or x, c) -> c iff (x & ~c) == 0
4390   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4391     return N1;
4392 
4393   if (SDValue Combined = visitORLike(N0, N1, N))
4394     return Combined;
4395 
4396   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4397   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4398     return BSwap;
4399   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4400     return BSwap;
4401 
4402   // reassociate or
4403   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4404     return ROR;
4405 
4406   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4407   // iff (c1 & c2) != 0.
4408   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4409     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4410       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4411         if (SDValue COR =
4412                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4413           return DAG.getNode(
4414               ISD::AND, SDLoc(N), VT,
4415               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4416         return SDValue();
4417       }
4418     }
4419   }
4420 
4421   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4422   if (N0.getOpcode() == N1.getOpcode())
4423     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4424       return Tmp;
4425 
4426   // See if this is some rotate idiom.
4427   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4428     return SDValue(Rot, 0);
4429 
4430   if (SDValue Load = MatchLoadCombine(N))
4431     return Load;
4432 
4433   // Simplify the operands using demanded-bits information.
4434   if (SimplifyDemandedBits(SDValue(N, 0)))
4435     return SDValue(N, 0);
4436 
4437   return SDValue();
4438 }
4439 
4440 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4441 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4442   if (Op.getOpcode() == ISD::AND) {
4443     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4444       Mask = Op.getOperand(1);
4445       Op = Op.getOperand(0);
4446     } else {
4447       return false;
4448     }
4449   }
4450 
4451   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4452     Shift = Op;
4453     return true;
4454   }
4455 
4456   return false;
4457 }
4458 
4459 // Return true if we can prove that, whenever Neg and Pos are both in the
4460 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4461 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4462 //
4463 //     (or (shift1 X, Neg), (shift2 X, Pos))
4464 //
4465 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4466 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4467 // to consider shift amounts with defined behavior.
4468 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4469   // If EltSize is a power of 2 then:
4470   //
4471   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4472   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4473   //
4474   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4475   // for the stronger condition:
4476   //
4477   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4478   //
4479   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4480   // we can just replace Neg with Neg' for the rest of the function.
4481   //
4482   // In other cases we check for the even stronger condition:
4483   //
4484   //     Neg == EltSize - Pos                                    [B]
4485   //
4486   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4487   // behavior if Pos == 0 (and consequently Neg == EltSize).
4488   //
4489   // We could actually use [A] whenever EltSize is a power of 2, but the
4490   // only extra cases that it would match are those uninteresting ones
4491   // where Neg and Pos are never in range at the same time.  E.g. for
4492   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4493   // as well as (sub 32, Pos), but:
4494   //
4495   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4496   //
4497   // always invokes undefined behavior for 32-bit X.
4498   //
4499   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4500   unsigned MaskLoBits = 0;
4501   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4502     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4503       if (NegC->getAPIntValue() == EltSize - 1) {
4504         Neg = Neg.getOperand(0);
4505         MaskLoBits = Log2_64(EltSize);
4506       }
4507     }
4508   }
4509 
4510   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4511   if (Neg.getOpcode() != ISD::SUB)
4512     return false;
4513   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4514   if (!NegC)
4515     return false;
4516   SDValue NegOp1 = Neg.getOperand(1);
4517 
4518   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4519   // Pos'.  The truncation is redundant for the purpose of the equality.
4520   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4521     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4522       if (PosC->getAPIntValue() == EltSize - 1)
4523         Pos = Pos.getOperand(0);
4524 
4525   // The condition we need is now:
4526   //
4527   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4528   //
4529   // If NegOp1 == Pos then we need:
4530   //
4531   //              EltSize & Mask == NegC & Mask
4532   //
4533   // (because "x & Mask" is a truncation and distributes through subtraction).
4534   APInt Width;
4535   if (Pos == NegOp1)
4536     Width = NegC->getAPIntValue();
4537 
4538   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4539   // Then the condition we want to prove becomes:
4540   //
4541   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4542   //
4543   // which, again because "x & Mask" is a truncation, becomes:
4544   //
4545   //                NegC & Mask == (EltSize - PosC) & Mask
4546   //             EltSize & Mask == (NegC + PosC) & Mask
4547   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4548     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4549       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4550     else
4551       return false;
4552   } else
4553     return false;
4554 
4555   // Now we just need to check that EltSize & Mask == Width & Mask.
4556   if (MaskLoBits)
4557     // EltSize & Mask is 0 since Mask is EltSize - 1.
4558     return Width.getLoBits(MaskLoBits) == 0;
4559   return Width == EltSize;
4560 }
4561 
4562 // A subroutine of MatchRotate used once we have found an OR of two opposite
4563 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4564 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4565 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4566 // Neg with outer conversions stripped away.
4567 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4568                                        SDValue Neg, SDValue InnerPos,
4569                                        SDValue InnerNeg, unsigned PosOpcode,
4570                                        unsigned NegOpcode, const SDLoc &DL) {
4571   // fold (or (shl x, (*ext y)),
4572   //          (srl x, (*ext (sub 32, y)))) ->
4573   //   (rotl x, y) or (rotr x, (sub 32, y))
4574   //
4575   // fold (or (shl x, (*ext (sub 32, y))),
4576   //          (srl x, (*ext y))) ->
4577   //   (rotr x, y) or (rotl x, (sub 32, y))
4578   EVT VT = Shifted.getValueType();
4579   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4580     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4581     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4582                        HasPos ? Pos : Neg).getNode();
4583   }
4584 
4585   return nullptr;
4586 }
4587 
4588 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4589 // idioms for rotate, and if the target supports rotation instructions, generate
4590 // a rot[lr].
4591 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4592   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4593   EVT VT = LHS.getValueType();
4594   if (!TLI.isTypeLegal(VT)) return nullptr;
4595 
4596   // The target must have at least one rotate flavor.
4597   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4598   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4599   if (!HasROTL && !HasROTR) return nullptr;
4600 
4601   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4602   SDValue LHSShift;   // The shift.
4603   SDValue LHSMask;    // AND value if any.
4604   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4605     return nullptr; // Not part of a rotate.
4606 
4607   SDValue RHSShift;   // The shift.
4608   SDValue RHSMask;    // AND value if any.
4609   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4610     return nullptr; // Not part of a rotate.
4611 
4612   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4613     return nullptr;   // Not shifting the same value.
4614 
4615   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4616     return nullptr;   // Shifts must disagree.
4617 
4618   // Canonicalize shl to left side in a shl/srl pair.
4619   if (RHSShift.getOpcode() == ISD::SHL) {
4620     std::swap(LHS, RHS);
4621     std::swap(LHSShift, RHSShift);
4622     std::swap(LHSMask, RHSMask);
4623   }
4624 
4625   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4626   SDValue LHSShiftArg = LHSShift.getOperand(0);
4627   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4628   SDValue RHSShiftArg = RHSShift.getOperand(0);
4629   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4630 
4631   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4632   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4633   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4634     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4635     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4636     if ((LShVal + RShVal) != EltSizeInBits)
4637       return nullptr;
4638 
4639     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4640                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4641 
4642     // If there is an AND of either shifted operand, apply it to the result.
4643     if (LHSMask.getNode() || RHSMask.getNode()) {
4644       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
4645 
4646       if (LHSMask.getNode()) {
4647         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4648         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4649                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4650                                        DAG.getConstant(RHSBits, DL, VT)));
4651       }
4652       if (RHSMask.getNode()) {
4653         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4654         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4655                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4656                                        DAG.getConstant(LHSBits, DL, VT)));
4657       }
4658 
4659       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4660     }
4661 
4662     return Rot.getNode();
4663   }
4664 
4665   // If there is a mask here, and we have a variable shift, we can't be sure
4666   // that we're masking out the right stuff.
4667   if (LHSMask.getNode() || RHSMask.getNode())
4668     return nullptr;
4669 
4670   // If the shift amount is sign/zext/any-extended just peel it off.
4671   SDValue LExtOp0 = LHSShiftAmt;
4672   SDValue RExtOp0 = RHSShiftAmt;
4673   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4674        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4675        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4676        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4677       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4678        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4679        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4680        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4681     LExtOp0 = LHSShiftAmt.getOperand(0);
4682     RExtOp0 = RHSShiftAmt.getOperand(0);
4683   }
4684 
4685   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4686                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4687   if (TryL)
4688     return TryL;
4689 
4690   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4691                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4692   if (TryR)
4693     return TryR;
4694 
4695   return nullptr;
4696 }
4697 
4698 namespace {
4699 /// Represents known origin of an individual byte in load combine pattern. The
4700 /// value of the byte is either constant zero or comes from memory.
4701 struct ByteProvider {
4702   // For constant zero providers Load is set to nullptr. For memory providers
4703   // Load represents the node which loads the byte from memory.
4704   // ByteOffset is the offset of the byte in the value produced by the load.
4705   LoadSDNode *Load;
4706   unsigned ByteOffset;
4707 
4708   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4709 
4710   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4711     return ByteProvider(Load, ByteOffset);
4712   }
4713   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4714 
4715   bool isConstantZero() const { return !Load; }
4716   bool isMemory() const { return Load; }
4717 
4718   bool operator==(const ByteProvider &Other) const {
4719     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4720   }
4721 
4722 private:
4723   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4724       : Load(Load), ByteOffset(ByteOffset) {}
4725 };
4726 
4727 /// Recursively traverses the expression calculating the origin of the requested
4728 /// byte of the given value. Returns None if the provider can't be calculated.
4729 ///
4730 /// For all the values except the root of the expression verifies that the value
4731 /// has exactly one use and if it's not true return None. This way if the origin
4732 /// of the byte is returned it's guaranteed that the values which contribute to
4733 /// the byte are not used outside of this expression.
4734 ///
4735 /// Because the parts of the expression are not allowed to have more than one
4736 /// use this function iterates over trees, not DAGs. So it never visits the same
4737 /// node more than once.
4738 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4739                                                    unsigned Depth,
4740                                                    bool Root = false) {
4741   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4742   if (Depth == 10)
4743     return None;
4744 
4745   if (!Root && !Op.hasOneUse())
4746     return None;
4747 
4748   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4749   unsigned BitWidth = Op.getValueSizeInBits();
4750   if (BitWidth % 8 != 0)
4751     return None;
4752   unsigned ByteWidth = BitWidth / 8;
4753   assert(Index < ByteWidth && "invalid index requested");
4754   (void) ByteWidth;
4755 
4756   switch (Op.getOpcode()) {
4757   case ISD::OR: {
4758     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4759     if (!LHS)
4760       return None;
4761     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4762     if (!RHS)
4763       return None;
4764 
4765     if (LHS->isConstantZero())
4766       return RHS;
4767     if (RHS->isConstantZero())
4768       return LHS;
4769     return None;
4770   }
4771   case ISD::SHL: {
4772     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4773     if (!ShiftOp)
4774       return None;
4775 
4776     uint64_t BitShift = ShiftOp->getZExtValue();
4777     if (BitShift % 8 != 0)
4778       return None;
4779     uint64_t ByteShift = BitShift / 8;
4780 
4781     return Index < ByteShift
4782                ? ByteProvider::getConstantZero()
4783                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4784                                        Depth + 1);
4785   }
4786   case ISD::ANY_EXTEND:
4787   case ISD::SIGN_EXTEND:
4788   case ISD::ZERO_EXTEND: {
4789     SDValue NarrowOp = Op->getOperand(0);
4790     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4791     if (NarrowBitWidth % 8 != 0)
4792       return None;
4793     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4794 
4795     if (Index >= NarrowByteWidth)
4796       return Op.getOpcode() == ISD::ZERO_EXTEND
4797                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4798                  : None;
4799     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4800   }
4801   case ISD::BSWAP:
4802     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4803                                  Depth + 1);
4804   case ISD::LOAD: {
4805     auto L = cast<LoadSDNode>(Op.getNode());
4806     if (L->isVolatile() || L->isIndexed())
4807       return None;
4808 
4809     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4810     if (NarrowBitWidth % 8 != 0)
4811       return None;
4812     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4813 
4814     if (Index >= NarrowByteWidth)
4815       return L->getExtensionType() == ISD::ZEXTLOAD
4816                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4817                  : None;
4818     return ByteProvider::getMemory(L, Index);
4819   }
4820   }
4821 
4822   return None;
4823 }
4824 } // namespace
4825 
4826 /// Match a pattern where a wide type scalar value is loaded by several narrow
4827 /// loads and combined by shifts and ors. Fold it into a single load or a load
4828 /// and a BSWAP if the targets supports it.
4829 ///
4830 /// Assuming little endian target:
4831 ///  i8 *a = ...
4832 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4833 /// =>
4834 ///  i32 val = *((i32)a)
4835 ///
4836 ///  i8 *a = ...
4837 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4838 /// =>
4839 ///  i32 val = BSWAP(*((i32)a))
4840 ///
4841 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4842 /// interact well with the worklist mechanism. When a part of the pattern is
4843 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4844 /// but the root node of the pattern which triggers the load combine is not
4845 /// necessarily a direct user of the changed node. For example, once the address
4846 /// of t28 load is reassociated load combine won't be triggered:
4847 ///             t25: i32 = add t4, Constant:i32<2>
4848 ///           t26: i64 = sign_extend t25
4849 ///        t27: i64 = add t2, t26
4850 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4851 ///     t29: i32 = zero_extend t28
4852 ///   t32: i32 = shl t29, Constant:i8<8>
4853 /// t33: i32 = or t23, t32
4854 /// As a possible fix visitLoad can check if the load can be a part of a load
4855 /// combine pattern and add corresponding OR roots to the worklist.
4856 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4857   assert(N->getOpcode() == ISD::OR &&
4858          "Can only match load combining against OR nodes");
4859 
4860   // Handles simple types only
4861   EVT VT = N->getValueType(0);
4862   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4863     return SDValue();
4864   unsigned ByteWidth = VT.getSizeInBits() / 8;
4865 
4866   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4867   // Before legalize we can introduce too wide illegal loads which will be later
4868   // split into legal sized loads. This enables us to combine i64 load by i8
4869   // patterns to a couple of i32 loads on 32 bit targets.
4870   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4871     return SDValue();
4872 
4873   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4874     unsigned BW, unsigned i) { return i; };
4875   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4876     unsigned BW, unsigned i) { return BW - i - 1; };
4877 
4878   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4879   auto MemoryByteOffset = [&] (ByteProvider P) {
4880     assert(P.isMemory() && "Must be a memory byte provider");
4881     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4882     assert(LoadBitWidth % 8 == 0 &&
4883            "can only analyze providers for individual bytes not bit");
4884     unsigned LoadByteWidth = LoadBitWidth / 8;
4885     return IsBigEndianTarget
4886             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4887             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4888   };
4889 
4890   Optional<BaseIndexOffset> Base;
4891   SDValue Chain;
4892 
4893   SmallSet<LoadSDNode *, 8> Loads;
4894   Optional<ByteProvider> FirstByteProvider;
4895   int64_t FirstOffset = INT64_MAX;
4896 
4897   // Check if all the bytes of the OR we are looking at are loaded from the same
4898   // base address. Collect bytes offsets from Base address in ByteOffsets.
4899   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4900   for (unsigned i = 0; i < ByteWidth; i++) {
4901     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4902     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4903       return SDValue();
4904 
4905     LoadSDNode *L = P->Load;
4906     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4907            "Must be enforced by calculateByteProvider");
4908     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4909 
4910     // All loads must share the same chain
4911     SDValue LChain = L->getChain();
4912     if (!Chain)
4913       Chain = LChain;
4914     else if (Chain != LChain)
4915       return SDValue();
4916 
4917     // Loads must share the same base address
4918     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
4919     int64_t ByteOffsetFromBase = 0;
4920     if (!Base)
4921       Base = Ptr;
4922     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
4923       return SDValue();
4924 
4925     // Calculate the offset of the current byte from the base address
4926     ByteOffsetFromBase += MemoryByteOffset(*P);
4927     ByteOffsets[i] = ByteOffsetFromBase;
4928 
4929     // Remember the first byte load
4930     if (ByteOffsetFromBase < FirstOffset) {
4931       FirstByteProvider = P;
4932       FirstOffset = ByteOffsetFromBase;
4933     }
4934 
4935     Loads.insert(L);
4936   }
4937   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
4938          "memory, so there must be at least one load which produces the value");
4939   assert(Base && "Base address of the accessed memory location must be set");
4940   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
4941 
4942   // Check if the bytes of the OR we are looking at match with either big or
4943   // little endian value load
4944   bool BigEndian = true, LittleEndian = true;
4945   for (unsigned i = 0; i < ByteWidth; i++) {
4946     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
4947     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
4948     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
4949     if (!BigEndian && !LittleEndian)
4950       return SDValue();
4951   }
4952   assert((BigEndian != LittleEndian) && "should be either or");
4953   assert(FirstByteProvider && "must be set");
4954 
4955   // Ensure that the first byte is loaded from zero offset of the first load.
4956   // So the combined value can be loaded from the first load address.
4957   if (MemoryByteOffset(*FirstByteProvider) != 0)
4958     return SDValue();
4959   LoadSDNode *FirstLoad = FirstByteProvider->Load;
4960 
4961   // The node we are looking at matches with the pattern, check if we can
4962   // replace it with a single load and bswap if needed.
4963 
4964   // If the load needs byte swap check if the target supports it
4965   bool NeedsBswap = IsBigEndianTarget != BigEndian;
4966 
4967   // Before legalize we can introduce illegal bswaps which will be later
4968   // converted to an explicit bswap sequence. This way we end up with a single
4969   // load and byte shuffling instead of several loads and byte shuffling.
4970   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
4971     return SDValue();
4972 
4973   // Check that a load of the wide type is both allowed and fast on the target
4974   bool Fast = false;
4975   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
4976                                         VT, FirstLoad->getAddressSpace(),
4977                                         FirstLoad->getAlignment(), &Fast);
4978   if (!Allowed || !Fast)
4979     return SDValue();
4980 
4981   SDValue NewLoad =
4982       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
4983                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
4984 
4985   // Transfer chain users from old loads to the new load.
4986   for (LoadSDNode *L : Loads)
4987     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
4988 
4989   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
4990 }
4991 
4992 SDValue DAGCombiner::visitXOR(SDNode *N) {
4993   SDValue N0 = N->getOperand(0);
4994   SDValue N1 = N->getOperand(1);
4995   EVT VT = N0.getValueType();
4996 
4997   // fold vector ops
4998   if (VT.isVector()) {
4999     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5000       return FoldedVOp;
5001 
5002     // fold (xor x, 0) -> x, vector edition
5003     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5004       return N1;
5005     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5006       return N0;
5007   }
5008 
5009   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5010   if (N0.isUndef() && N1.isUndef())
5011     return DAG.getConstant(0, SDLoc(N), VT);
5012   // fold (xor x, undef) -> undef
5013   if (N0.isUndef())
5014     return N0;
5015   if (N1.isUndef())
5016     return N1;
5017   // fold (xor c1, c2) -> c1^c2
5018   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5019   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5020   if (N0C && N1C)
5021     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5022   // canonicalize constant to RHS
5023   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5024      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5025     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5026   // fold (xor x, 0) -> x
5027   if (isNullConstant(N1))
5028     return N0;
5029 
5030   if (SDValue NewSel = foldBinOpIntoSelect(N))
5031     return NewSel;
5032 
5033   // reassociate xor
5034   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5035     return RXOR;
5036 
5037   // fold !(x cc y) -> (x !cc y)
5038   SDValue LHS, RHS, CC;
5039   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5040     bool isInt = LHS.getValueType().isInteger();
5041     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5042                                                isInt);
5043 
5044     if (!LegalOperations ||
5045         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5046       switch (N0.getOpcode()) {
5047       default:
5048         llvm_unreachable("Unhandled SetCC Equivalent!");
5049       case ISD::SETCC:
5050         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5051       case ISD::SELECT_CC:
5052         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5053                                N0.getOperand(3), NotCC);
5054       }
5055     }
5056   }
5057 
5058   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5059   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5060       N0.getNode()->hasOneUse() &&
5061       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5062     SDValue V = N0.getOperand(0);
5063     SDLoc DL(N0);
5064     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5065                     DAG.getConstant(1, DL, V.getValueType()));
5066     AddToWorklist(V.getNode());
5067     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5068   }
5069 
5070   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5071   if (isOneConstant(N1) && VT == MVT::i1 &&
5072       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5073     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5074     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5075       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5076       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5077       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5078       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5079       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5080     }
5081   }
5082   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5083   if (isAllOnesConstant(N1) &&
5084       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5085     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5086     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5087       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5088       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5089       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5090       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5091       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5092     }
5093   }
5094   // fold (xor (and x, y), y) -> (and (not x), y)
5095   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5096       N0->getOperand(1) == N1) {
5097     SDValue X = N0->getOperand(0);
5098     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5099     AddToWorklist(NotX.getNode());
5100     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5101   }
5102   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5103   if (N1C && N0.getOpcode() == ISD::XOR) {
5104     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5105       SDLoc DL(N);
5106       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5107                          DAG.getConstant(N1C->getAPIntValue() ^
5108                                          N00C->getAPIntValue(), DL, VT));
5109     }
5110     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5111       SDLoc DL(N);
5112       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5113                          DAG.getConstant(N1C->getAPIntValue() ^
5114                                          N01C->getAPIntValue(), DL, VT));
5115     }
5116   }
5117 
5118   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5119   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5120   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5121       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5122       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5123     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5124       if (C->getAPIntValue() == (OpSizeInBits - 1))
5125         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5126   }
5127 
5128   // fold (xor x, x) -> 0
5129   if (N0 == N1)
5130     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5131 
5132   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5133   // Here is a concrete example of this equivalence:
5134   // i16   x ==  14
5135   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5136   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5137   //
5138   // =>
5139   //
5140   // i16     ~1      == 0b1111111111111110
5141   // i16 rol(~1, 14) == 0b1011111111111111
5142   //
5143   // Some additional tips to help conceptualize this transform:
5144   // - Try to see the operation as placing a single zero in a value of all ones.
5145   // - There exists no value for x which would allow the result to contain zero.
5146   // - Values of x larger than the bitwidth are undefined and do not require a
5147   //   consistent result.
5148   // - Pushing the zero left requires shifting one bits in from the right.
5149   // A rotate left of ~1 is a nice way of achieving the desired result.
5150   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5151       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5152     SDLoc DL(N);
5153     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5154                        N0.getOperand(1));
5155   }
5156 
5157   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5158   if (N0.getOpcode() == N1.getOpcode())
5159     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5160       return Tmp;
5161 
5162   // Simplify the expression using non-local knowledge.
5163   if (SimplifyDemandedBits(SDValue(N, 0)))
5164     return SDValue(N, 0);
5165 
5166   return SDValue();
5167 }
5168 
5169 /// Handle transforms common to the three shifts, when the shift amount is a
5170 /// constant.
5171 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5172   SDNode *LHS = N->getOperand(0).getNode();
5173   if (!LHS->hasOneUse()) return SDValue();
5174 
5175   // We want to pull some binops through shifts, so that we have (and (shift))
5176   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5177   // thing happens with address calculations, so it's important to canonicalize
5178   // it.
5179   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5180 
5181   switch (LHS->getOpcode()) {
5182   default: return SDValue();
5183   case ISD::OR:
5184   case ISD::XOR:
5185     HighBitSet = false; // We can only transform sra if the high bit is clear.
5186     break;
5187   case ISD::AND:
5188     HighBitSet = true;  // We can only transform sra if the high bit is set.
5189     break;
5190   case ISD::ADD:
5191     if (N->getOpcode() != ISD::SHL)
5192       return SDValue(); // only shl(add) not sr[al](add).
5193     HighBitSet = false; // We can only transform sra if the high bit is clear.
5194     break;
5195   }
5196 
5197   // We require the RHS of the binop to be a constant and not opaque as well.
5198   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5199   if (!BinOpCst) return SDValue();
5200 
5201   // FIXME: disable this unless the input to the binop is a shift by a constant
5202   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5203   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5204   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5205                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5206                  BinOpLHSVal->getOpcode() == ISD::SRL;
5207   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5208                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5209 
5210   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5211       !isCopyOrSelect)
5212     return SDValue();
5213 
5214   if (isCopyOrSelect && N->hasOneUse())
5215     return SDValue();
5216 
5217   EVT VT = N->getValueType(0);
5218 
5219   // If this is a signed shift right, and the high bit is modified by the
5220   // logical operation, do not perform the transformation. The highBitSet
5221   // boolean indicates the value of the high bit of the constant which would
5222   // cause it to be modified for this operation.
5223   if (N->getOpcode() == ISD::SRA) {
5224     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5225     if (BinOpRHSSignSet != HighBitSet)
5226       return SDValue();
5227   }
5228 
5229   if (!TLI.isDesirableToCommuteWithShift(LHS))
5230     return SDValue();
5231 
5232   // Fold the constants, shifting the binop RHS by the shift amount.
5233   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5234                                N->getValueType(0),
5235                                LHS->getOperand(1), N->getOperand(1));
5236   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5237 
5238   // Create the new shift.
5239   SDValue NewShift = DAG.getNode(N->getOpcode(),
5240                                  SDLoc(LHS->getOperand(0)),
5241                                  VT, LHS->getOperand(0), N->getOperand(1));
5242 
5243   // Create the new binop.
5244   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5245 }
5246 
5247 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5248   assert(N->getOpcode() == ISD::TRUNCATE);
5249   assert(N->getOperand(0).getOpcode() == ISD::AND);
5250 
5251   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5252   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5253     SDValue N01 = N->getOperand(0).getOperand(1);
5254     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5255       SDLoc DL(N);
5256       EVT TruncVT = N->getValueType(0);
5257       SDValue N00 = N->getOperand(0).getOperand(0);
5258       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5259       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5260       AddToWorklist(Trunc00.getNode());
5261       AddToWorklist(Trunc01.getNode());
5262       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5263     }
5264   }
5265 
5266   return SDValue();
5267 }
5268 
5269 SDValue DAGCombiner::visitRotate(SDNode *N) {
5270   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5271   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
5272       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
5273     if (SDValue NewOp1 =
5274             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
5275       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
5276                          N->getOperand(0), NewOp1);
5277   }
5278   return SDValue();
5279 }
5280 
5281 SDValue DAGCombiner::visitSHL(SDNode *N) {
5282   SDValue N0 = N->getOperand(0);
5283   SDValue N1 = N->getOperand(1);
5284   EVT VT = N0.getValueType();
5285   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5286 
5287   // fold vector ops
5288   if (VT.isVector()) {
5289     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5290       return FoldedVOp;
5291 
5292     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5293     // If setcc produces all-one true value then:
5294     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5295     if (N1CV && N1CV->isConstant()) {
5296       if (N0.getOpcode() == ISD::AND) {
5297         SDValue N00 = N0->getOperand(0);
5298         SDValue N01 = N0->getOperand(1);
5299         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5300 
5301         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5302             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5303                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5304           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5305                                                      N01CV, N1CV))
5306             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5307         }
5308       }
5309     }
5310   }
5311 
5312   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5313 
5314   // fold (shl c1, c2) -> c1<<c2
5315   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5316   if (N0C && N1C && !N1C->isOpaque())
5317     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5318   // fold (shl 0, x) -> 0
5319   if (isNullConstantOrNullSplatConstant(N0))
5320     return N0;
5321   // fold (shl x, c >= size(x)) -> undef
5322   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5323     return DAG.getUNDEF(VT);
5324   // fold (shl x, 0) -> x
5325   if (N1C && N1C->isNullValue())
5326     return N0;
5327   // fold (shl undef, x) -> 0
5328   if (N0.isUndef())
5329     return DAG.getConstant(0, SDLoc(N), VT);
5330 
5331   if (SDValue NewSel = foldBinOpIntoSelect(N))
5332     return NewSel;
5333 
5334   // if (shl x, c) is known to be zero, return 0
5335   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5336                             APInt::getAllOnesValue(OpSizeInBits)))
5337     return DAG.getConstant(0, SDLoc(N), VT);
5338   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5339   if (N1.getOpcode() == ISD::TRUNCATE &&
5340       N1.getOperand(0).getOpcode() == ISD::AND) {
5341     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5342       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5343   }
5344 
5345   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5346     return SDValue(N, 0);
5347 
5348   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5349   if (N1C && N0.getOpcode() == ISD::SHL) {
5350     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5351       SDLoc DL(N);
5352       APInt c1 = N0C1->getAPIntValue();
5353       APInt c2 = N1C->getAPIntValue();
5354       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5355 
5356       APInt Sum = c1 + c2;
5357       if (Sum.uge(OpSizeInBits))
5358         return DAG.getConstant(0, DL, VT);
5359 
5360       return DAG.getNode(
5361           ISD::SHL, DL, VT, N0.getOperand(0),
5362           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5363     }
5364   }
5365 
5366   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5367   // For this to be valid, the second form must not preserve any of the bits
5368   // that are shifted out by the inner shift in the first form.  This means
5369   // the outer shift size must be >= the number of bits added by the ext.
5370   // As a corollary, we don't care what kind of ext it is.
5371   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5372               N0.getOpcode() == ISD::ANY_EXTEND ||
5373               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5374       N0.getOperand(0).getOpcode() == ISD::SHL) {
5375     SDValue N0Op0 = N0.getOperand(0);
5376     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5377       APInt c1 = N0Op0C1->getAPIntValue();
5378       APInt c2 = N1C->getAPIntValue();
5379       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5380 
5381       EVT InnerShiftVT = N0Op0.getValueType();
5382       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5383       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5384         SDLoc DL(N0);
5385         APInt Sum = c1 + c2;
5386         if (Sum.uge(OpSizeInBits))
5387           return DAG.getConstant(0, DL, VT);
5388 
5389         return DAG.getNode(
5390             ISD::SHL, DL, VT,
5391             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5392             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5393       }
5394     }
5395   }
5396 
5397   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5398   // Only fold this if the inner zext has no other uses to avoid increasing
5399   // the total number of instructions.
5400   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5401       N0.getOperand(0).getOpcode() == ISD::SRL) {
5402     SDValue N0Op0 = N0.getOperand(0);
5403     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5404       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5405         uint64_t c1 = N0Op0C1->getZExtValue();
5406         uint64_t c2 = N1C->getZExtValue();
5407         if (c1 == c2) {
5408           SDValue NewOp0 = N0.getOperand(0);
5409           EVT CountVT = NewOp0.getOperand(1).getValueType();
5410           SDLoc DL(N);
5411           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5412                                        NewOp0,
5413                                        DAG.getConstant(c2, DL, CountVT));
5414           AddToWorklist(NewSHL.getNode());
5415           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5416         }
5417       }
5418     }
5419   }
5420 
5421   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5422   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5423   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5424       N0->getFlags().hasExact()) {
5425     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5426       uint64_t C1 = N0C1->getZExtValue();
5427       uint64_t C2 = N1C->getZExtValue();
5428       SDLoc DL(N);
5429       if (C1 <= C2)
5430         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5431                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5432       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5433                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5434     }
5435   }
5436 
5437   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5438   //                               (and (srl x, (sub c1, c2), MASK)
5439   // Only fold this if the inner shift has no other uses -- if it does, folding
5440   // this will increase the total number of instructions.
5441   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5442     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5443       uint64_t c1 = N0C1->getZExtValue();
5444       if (c1 < OpSizeInBits) {
5445         uint64_t c2 = N1C->getZExtValue();
5446         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5447         SDValue Shift;
5448         if (c2 > c1) {
5449           Mask <<= c2 - c1;
5450           SDLoc DL(N);
5451           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5452                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5453         } else {
5454           Mask.lshrInPlace(c1 - c2);
5455           SDLoc DL(N);
5456           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5457                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5458         }
5459         SDLoc DL(N0);
5460         return DAG.getNode(ISD::AND, DL, VT, Shift,
5461                            DAG.getConstant(Mask, DL, VT));
5462       }
5463     }
5464   }
5465 
5466   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5467   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5468       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5469     SDLoc DL(N);
5470     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5471     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5472     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5473   }
5474 
5475   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5476   // Variant of version done on multiply, except mul by a power of 2 is turned
5477   // into a shift.
5478   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5479       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5480       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5481     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5482     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5483     AddToWorklist(Shl0.getNode());
5484     AddToWorklist(Shl1.getNode());
5485     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5486   }
5487 
5488   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5489   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5490       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5491       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5492     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5493     if (isConstantOrConstantVector(Shl))
5494       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5495   }
5496 
5497   if (N1C && !N1C->isOpaque())
5498     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5499       return NewSHL;
5500 
5501   return SDValue();
5502 }
5503 
5504 SDValue DAGCombiner::visitSRA(SDNode *N) {
5505   SDValue N0 = N->getOperand(0);
5506   SDValue N1 = N->getOperand(1);
5507   EVT VT = N0.getValueType();
5508   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5509 
5510   // Arithmetic shifting an all-sign-bit value is a no-op.
5511   // fold (sra 0, x) -> 0
5512   // fold (sra -1, x) -> -1
5513   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5514     return N0;
5515 
5516   // fold vector ops
5517   if (VT.isVector())
5518     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5519       return FoldedVOp;
5520 
5521   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5522 
5523   // fold (sra c1, c2) -> (sra c1, c2)
5524   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5525   if (N0C && N1C && !N1C->isOpaque())
5526     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5527   // fold (sra x, c >= size(x)) -> undef
5528   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5529     return DAG.getUNDEF(VT);
5530   // fold (sra x, 0) -> x
5531   if (N1C && N1C->isNullValue())
5532     return N0;
5533 
5534   if (SDValue NewSel = foldBinOpIntoSelect(N))
5535     return NewSel;
5536 
5537   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5538   // sext_inreg.
5539   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5540     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5541     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5542     if (VT.isVector())
5543       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5544                                ExtVT, VT.getVectorNumElements());
5545     if ((!LegalOperations ||
5546          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5547       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5548                          N0.getOperand(0), DAG.getValueType(ExtVT));
5549   }
5550 
5551   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5552   if (N1C && N0.getOpcode() == ISD::SRA) {
5553     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5554       SDLoc DL(N);
5555       APInt c1 = N0C1->getAPIntValue();
5556       APInt c2 = N1C->getAPIntValue();
5557       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5558 
5559       APInt Sum = c1 + c2;
5560       if (Sum.uge(OpSizeInBits))
5561         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
5562 
5563       return DAG.getNode(
5564           ISD::SRA, DL, VT, N0.getOperand(0),
5565           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5566     }
5567   }
5568 
5569   // fold (sra (shl X, m), (sub result_size, n))
5570   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5571   // result_size - n != m.
5572   // If truncate is free for the target sext(shl) is likely to result in better
5573   // code.
5574   if (N0.getOpcode() == ISD::SHL && N1C) {
5575     // Get the two constanst of the shifts, CN0 = m, CN = n.
5576     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5577     if (N01C) {
5578       LLVMContext &Ctx = *DAG.getContext();
5579       // Determine what the truncate's result bitsize and type would be.
5580       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5581 
5582       if (VT.isVector())
5583         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5584 
5585       // Determine the residual right-shift amount.
5586       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5587 
5588       // If the shift is not a no-op (in which case this should be just a sign
5589       // extend already), the truncated to type is legal, sign_extend is legal
5590       // on that type, and the truncate to that type is both legal and free,
5591       // perform the transform.
5592       if ((ShiftAmt > 0) &&
5593           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5594           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5595           TLI.isTruncateFree(VT, TruncVT)) {
5596 
5597         SDLoc DL(N);
5598         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5599             getShiftAmountTy(N0.getOperand(0).getValueType()));
5600         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5601                                     N0.getOperand(0), Amt);
5602         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5603                                     Shift);
5604         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5605                            N->getValueType(0), Trunc);
5606       }
5607     }
5608   }
5609 
5610   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5611   if (N1.getOpcode() == ISD::TRUNCATE &&
5612       N1.getOperand(0).getOpcode() == ISD::AND) {
5613     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5614       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5615   }
5616 
5617   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5618   //      if c1 is equal to the number of bits the trunc removes
5619   if (N0.getOpcode() == ISD::TRUNCATE &&
5620       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5621        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5622       N0.getOperand(0).hasOneUse() &&
5623       N0.getOperand(0).getOperand(1).hasOneUse() &&
5624       N1C) {
5625     SDValue N0Op0 = N0.getOperand(0);
5626     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5627       unsigned LargeShiftVal = LargeShift->getZExtValue();
5628       EVT LargeVT = N0Op0.getValueType();
5629 
5630       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5631         SDLoc DL(N);
5632         SDValue Amt =
5633           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5634                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5635         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5636                                   N0Op0.getOperand(0), Amt);
5637         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5638       }
5639     }
5640   }
5641 
5642   // Simplify, based on bits shifted out of the LHS.
5643   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5644     return SDValue(N, 0);
5645 
5646 
5647   // If the sign bit is known to be zero, switch this to a SRL.
5648   if (DAG.SignBitIsZero(N0))
5649     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5650 
5651   if (N1C && !N1C->isOpaque())
5652     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5653       return NewSRA;
5654 
5655   return SDValue();
5656 }
5657 
5658 SDValue DAGCombiner::visitSRL(SDNode *N) {
5659   SDValue N0 = N->getOperand(0);
5660   SDValue N1 = N->getOperand(1);
5661   EVT VT = N0.getValueType();
5662   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5663 
5664   // fold vector ops
5665   if (VT.isVector())
5666     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5667       return FoldedVOp;
5668 
5669   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5670 
5671   // fold (srl c1, c2) -> c1 >>u c2
5672   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5673   if (N0C && N1C && !N1C->isOpaque())
5674     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5675   // fold (srl 0, x) -> 0
5676   if (isNullConstantOrNullSplatConstant(N0))
5677     return N0;
5678   // fold (srl x, c >= size(x)) -> undef
5679   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5680     return DAG.getUNDEF(VT);
5681   // fold (srl x, 0) -> x
5682   if (N1C && N1C->isNullValue())
5683     return N0;
5684 
5685   if (SDValue NewSel = foldBinOpIntoSelect(N))
5686     return NewSel;
5687 
5688   // if (srl x, c) is known to be zero, return 0
5689   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5690                                    APInt::getAllOnesValue(OpSizeInBits)))
5691     return DAG.getConstant(0, SDLoc(N), VT);
5692 
5693   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5694   if (N1C && N0.getOpcode() == ISD::SRL) {
5695     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5696       SDLoc DL(N);
5697       APInt c1 = N0C1->getAPIntValue();
5698       APInt c2 = N1C->getAPIntValue();
5699       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5700 
5701       APInt Sum = c1 + c2;
5702       if (Sum.uge(OpSizeInBits))
5703         return DAG.getConstant(0, DL, VT);
5704 
5705       return DAG.getNode(
5706           ISD::SRL, DL, VT, N0.getOperand(0),
5707           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5708     }
5709   }
5710 
5711   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5712   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5713       N0.getOperand(0).getOpcode() == ISD::SRL) {
5714     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5715       uint64_t c1 = N001C->getZExtValue();
5716       uint64_t c2 = N1C->getZExtValue();
5717       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5718       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5719       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5720       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5721       if (c1 + OpSizeInBits == InnerShiftSize) {
5722         SDLoc DL(N0);
5723         if (c1 + c2 >= InnerShiftSize)
5724           return DAG.getConstant(0, DL, VT);
5725         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5726                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5727                                        N0.getOperand(0).getOperand(0),
5728                                        DAG.getConstant(c1 + c2, DL,
5729                                                        ShiftCountVT)));
5730       }
5731     }
5732   }
5733 
5734   // fold (srl (shl x, c), c) -> (and x, cst2)
5735   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5736       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5737     SDLoc DL(N);
5738     SDValue Mask =
5739         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5740     AddToWorklist(Mask.getNode());
5741     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5742   }
5743 
5744   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5745   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5746     // Shifting in all undef bits?
5747     EVT SmallVT = N0.getOperand(0).getValueType();
5748     unsigned BitSize = SmallVT.getScalarSizeInBits();
5749     if (N1C->getZExtValue() >= BitSize)
5750       return DAG.getUNDEF(VT);
5751 
5752     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5753       uint64_t ShiftAmt = N1C->getZExtValue();
5754       SDLoc DL0(N0);
5755       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5756                                        N0.getOperand(0),
5757                           DAG.getConstant(ShiftAmt, DL0,
5758                                           getShiftAmountTy(SmallVT)));
5759       AddToWorklist(SmallShift.getNode());
5760       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5761       SDLoc DL(N);
5762       return DAG.getNode(ISD::AND, DL, VT,
5763                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5764                          DAG.getConstant(Mask, DL, VT));
5765     }
5766   }
5767 
5768   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5769   // bit, which is unmodified by sra.
5770   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5771     if (N0.getOpcode() == ISD::SRA)
5772       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5773   }
5774 
5775   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5776   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5777       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5778     KnownBits Known;
5779     DAG.computeKnownBits(N0.getOperand(0), Known);
5780 
5781     // If any of the input bits are KnownOne, then the input couldn't be all
5782     // zeros, thus the result of the srl will always be zero.
5783     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5784 
5785     // If all of the bits input the to ctlz node are known to be zero, then
5786     // the result of the ctlz is "32" and the result of the shift is one.
5787     APInt UnknownBits = ~Known.Zero;
5788     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5789 
5790     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5791     if (UnknownBits.isPowerOf2()) {
5792       // Okay, we know that only that the single bit specified by UnknownBits
5793       // could be set on input to the CTLZ node. If this bit is set, the SRL
5794       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5795       // to an SRL/XOR pair, which is likely to simplify more.
5796       unsigned ShAmt = UnknownBits.countTrailingZeros();
5797       SDValue Op = N0.getOperand(0);
5798 
5799       if (ShAmt) {
5800         SDLoc DL(N0);
5801         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5802                   DAG.getConstant(ShAmt, DL,
5803                                   getShiftAmountTy(Op.getValueType())));
5804         AddToWorklist(Op.getNode());
5805       }
5806 
5807       SDLoc DL(N);
5808       return DAG.getNode(ISD::XOR, DL, VT,
5809                          Op, DAG.getConstant(1, DL, VT));
5810     }
5811   }
5812 
5813   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5814   if (N1.getOpcode() == ISD::TRUNCATE &&
5815       N1.getOperand(0).getOpcode() == ISD::AND) {
5816     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5817       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5818   }
5819 
5820   // fold operands of srl based on knowledge that the low bits are not
5821   // demanded.
5822   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5823     return SDValue(N, 0);
5824 
5825   if (N1C && !N1C->isOpaque())
5826     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5827       return NewSRL;
5828 
5829   // Attempt to convert a srl of a load into a narrower zero-extending load.
5830   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5831     return NarrowLoad;
5832 
5833   // Here is a common situation. We want to optimize:
5834   //
5835   //   %a = ...
5836   //   %b = and i32 %a, 2
5837   //   %c = srl i32 %b, 1
5838   //   brcond i32 %c ...
5839   //
5840   // into
5841   //
5842   //   %a = ...
5843   //   %b = and %a, 2
5844   //   %c = setcc eq %b, 0
5845   //   brcond %c ...
5846   //
5847   // However when after the source operand of SRL is optimized into AND, the SRL
5848   // itself may not be optimized further. Look for it and add the BRCOND into
5849   // the worklist.
5850   if (N->hasOneUse()) {
5851     SDNode *Use = *N->use_begin();
5852     if (Use->getOpcode() == ISD::BRCOND)
5853       AddToWorklist(Use);
5854     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5855       // Also look pass the truncate.
5856       Use = *Use->use_begin();
5857       if (Use->getOpcode() == ISD::BRCOND)
5858         AddToWorklist(Use);
5859     }
5860   }
5861 
5862   return SDValue();
5863 }
5864 
5865 SDValue DAGCombiner::visitABS(SDNode *N) {
5866   SDValue N0 = N->getOperand(0);
5867   EVT VT = N->getValueType(0);
5868 
5869   // fold (abs c1) -> c2
5870   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5871     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5872   // fold (abs (abs x)) -> (abs x)
5873   if (N0.getOpcode() == ISD::ABS)
5874     return N0;
5875   // fold (abs x) -> x iff not-negative
5876   if (DAG.SignBitIsZero(N0))
5877     return N0;
5878   return SDValue();
5879 }
5880 
5881 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5882   SDValue N0 = N->getOperand(0);
5883   EVT VT = N->getValueType(0);
5884 
5885   // fold (bswap c1) -> c2
5886   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5887     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5888   // fold (bswap (bswap x)) -> x
5889   if (N0.getOpcode() == ISD::BSWAP)
5890     return N0->getOperand(0);
5891   return SDValue();
5892 }
5893 
5894 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5895   SDValue N0 = N->getOperand(0);
5896   EVT VT = N->getValueType(0);
5897 
5898   // fold (bitreverse c1) -> c2
5899   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5900     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
5901   // fold (bitreverse (bitreverse x)) -> x
5902   if (N0.getOpcode() == ISD::BITREVERSE)
5903     return N0.getOperand(0);
5904   return SDValue();
5905 }
5906 
5907 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5908   SDValue N0 = N->getOperand(0);
5909   EVT VT = N->getValueType(0);
5910 
5911   // fold (ctlz c1) -> c2
5912   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5913     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5914   return SDValue();
5915 }
5916 
5917 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5918   SDValue N0 = N->getOperand(0);
5919   EVT VT = N->getValueType(0);
5920 
5921   // fold (ctlz_zero_undef c1) -> c2
5922   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5923     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5924   return SDValue();
5925 }
5926 
5927 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5928   SDValue N0 = N->getOperand(0);
5929   EVT VT = N->getValueType(0);
5930 
5931   // fold (cttz c1) -> c2
5932   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5933     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5934   return SDValue();
5935 }
5936 
5937 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5938   SDValue N0 = N->getOperand(0);
5939   EVT VT = N->getValueType(0);
5940 
5941   // fold (cttz_zero_undef c1) -> c2
5942   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5943     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5944   return SDValue();
5945 }
5946 
5947 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5948   SDValue N0 = N->getOperand(0);
5949   EVT VT = N->getValueType(0);
5950 
5951   // fold (ctpop c1) -> c2
5952   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5953     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5954   return SDValue();
5955 }
5956 
5957 
5958 /// \brief Generate Min/Max node
5959 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5960                                    SDValue RHS, SDValue True, SDValue False,
5961                                    ISD::CondCode CC, const TargetLowering &TLI,
5962                                    SelectionDAG &DAG) {
5963   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5964     return SDValue();
5965 
5966   switch (CC) {
5967   case ISD::SETOLT:
5968   case ISD::SETOLE:
5969   case ISD::SETLT:
5970   case ISD::SETLE:
5971   case ISD::SETULT:
5972   case ISD::SETULE: {
5973     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5974     if (TLI.isOperationLegal(Opcode, VT))
5975       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5976     return SDValue();
5977   }
5978   case ISD::SETOGT:
5979   case ISD::SETOGE:
5980   case ISD::SETGT:
5981   case ISD::SETGE:
5982   case ISD::SETUGT:
5983   case ISD::SETUGE: {
5984     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5985     if (TLI.isOperationLegal(Opcode, VT))
5986       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5987     return SDValue();
5988   }
5989   default:
5990     return SDValue();
5991   }
5992 }
5993 
5994 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5995   SDValue Cond = N->getOperand(0);
5996   SDValue N1 = N->getOperand(1);
5997   SDValue N2 = N->getOperand(2);
5998   EVT VT = N->getValueType(0);
5999   EVT CondVT = Cond.getValueType();
6000   SDLoc DL(N);
6001 
6002   if (!VT.isInteger())
6003     return SDValue();
6004 
6005   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6006   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6007   if (!C1 || !C2)
6008     return SDValue();
6009 
6010   // Only do this before legalization to avoid conflicting with target-specific
6011   // transforms in the other direction (create a select from a zext/sext). There
6012   // is also a target-independent combine here in DAGCombiner in the other
6013   // direction for (select Cond, -1, 0) when the condition is not i1.
6014   if (CondVT == MVT::i1 && !LegalOperations) {
6015     if (C1->isNullValue() && C2->isOne()) {
6016       // select Cond, 0, 1 --> zext (!Cond)
6017       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6018       if (VT != MVT::i1)
6019         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6020       return NotCond;
6021     }
6022     if (C1->isNullValue() && C2->isAllOnesValue()) {
6023       // select Cond, 0, -1 --> sext (!Cond)
6024       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6025       if (VT != MVT::i1)
6026         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6027       return NotCond;
6028     }
6029     if (C1->isOne() && C2->isNullValue()) {
6030       // select Cond, 1, 0 --> zext (Cond)
6031       if (VT != MVT::i1)
6032         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6033       return Cond;
6034     }
6035     if (C1->isAllOnesValue() && C2->isNullValue()) {
6036       // select Cond, -1, 0 --> sext (Cond)
6037       if (VT != MVT::i1)
6038         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6039       return Cond;
6040     }
6041 
6042     // For any constants that differ by 1, we can transform the select into an
6043     // extend and add. Use a target hook because some targets may prefer to
6044     // transform in the other direction.
6045     if (TLI.convertSelectOfConstantsToMath()) {
6046       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6047         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6048         if (VT != MVT::i1)
6049           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6050         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6051       }
6052       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6053         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6054         if (VT != MVT::i1)
6055           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6056         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6057       }
6058     }
6059 
6060     return SDValue();
6061   }
6062 
6063   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6064   // We can't do this reliably if integer based booleans have different contents
6065   // to floating point based booleans. This is because we can't tell whether we
6066   // have an integer-based boolean or a floating-point-based boolean unless we
6067   // can find the SETCC that produced it and inspect its operands. This is
6068   // fairly easy if C is the SETCC node, but it can potentially be
6069   // undiscoverable (or not reasonably discoverable). For example, it could be
6070   // in another basic block or it could require searching a complicated
6071   // expression.
6072   if (CondVT.isInteger() &&
6073       TLI.getBooleanContents(false, true) ==
6074           TargetLowering::ZeroOrOneBooleanContent &&
6075       TLI.getBooleanContents(false, false) ==
6076           TargetLowering::ZeroOrOneBooleanContent &&
6077       C1->isNullValue() && C2->isOne()) {
6078     SDValue NotCond =
6079         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6080     if (VT.bitsEq(CondVT))
6081       return NotCond;
6082     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6083   }
6084 
6085   return SDValue();
6086 }
6087 
6088 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6089   SDValue N0 = N->getOperand(0);
6090   SDValue N1 = N->getOperand(1);
6091   SDValue N2 = N->getOperand(2);
6092   EVT VT = N->getValueType(0);
6093   EVT VT0 = N0.getValueType();
6094 
6095   // fold (select C, X, X) -> X
6096   if (N1 == N2)
6097     return N1;
6098   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6099     // fold (select true, X, Y) -> X
6100     // fold (select false, X, Y) -> Y
6101     return !N0C->isNullValue() ? N1 : N2;
6102   }
6103   // fold (select X, X, Y) -> (or X, Y)
6104   // fold (select X, 1, Y) -> (or C, Y)
6105   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6106     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
6107 
6108   if (SDValue V = foldSelectOfConstants(N))
6109     return V;
6110 
6111   // fold (select C, 0, X) -> (and (not C), X)
6112   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6113     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6114     AddToWorklist(NOTNode.getNode());
6115     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
6116   }
6117   // fold (select C, X, 1) -> (or (not C), X)
6118   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6119     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6120     AddToWorklist(NOTNode.getNode());
6121     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
6122   }
6123   // fold (select X, Y, X) -> (and X, Y)
6124   // fold (select X, Y, 0) -> (and X, Y)
6125   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6126     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
6127 
6128   // If we can fold this based on the true/false value, do so.
6129   if (SimplifySelectOps(N, N1, N2))
6130     return SDValue(N, 0);  // Don't revisit N.
6131 
6132   if (VT0 == MVT::i1) {
6133     // The code in this block deals with the following 2 equivalences:
6134     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6135     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6136     // The target can specify its preferred form with the
6137     // shouldNormalizeToSelectSequence() callback. However we always transform
6138     // to the right anyway if we find the inner select exists in the DAG anyway
6139     // and we always transform to the left side if we know that we can further
6140     // optimize the combination of the conditions.
6141     bool normalizeToSequence
6142       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6143     // select (and Cond0, Cond1), X, Y
6144     //   -> select Cond0, (select Cond1, X, Y), Y
6145     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6146       SDValue Cond0 = N0->getOperand(0);
6147       SDValue Cond1 = N0->getOperand(1);
6148       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6149                                         N1.getValueType(), Cond1, N1, N2);
6150       if (normalizeToSequence || !InnerSelect.use_empty())
6151         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
6152                            InnerSelect, N2);
6153     }
6154     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6155     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6156       SDValue Cond0 = N0->getOperand(0);
6157       SDValue Cond1 = N0->getOperand(1);
6158       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6159                                         N1.getValueType(), Cond1, N1, N2);
6160       if (normalizeToSequence || !InnerSelect.use_empty())
6161         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
6162                            InnerSelect);
6163     }
6164 
6165     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6166     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6167       SDValue N1_0 = N1->getOperand(0);
6168       SDValue N1_1 = N1->getOperand(1);
6169       SDValue N1_2 = N1->getOperand(2);
6170       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6171         // Create the actual and node if we can generate good code for it.
6172         if (!normalizeToSequence) {
6173           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
6174                                     N0, N1_0);
6175           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
6176                              N1_1, N2);
6177         }
6178         // Otherwise see if we can optimize the "and" to a better pattern.
6179         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6180           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6181                              N1_1, N2);
6182       }
6183     }
6184     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6185     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6186       SDValue N2_0 = N2->getOperand(0);
6187       SDValue N2_1 = N2->getOperand(1);
6188       SDValue N2_2 = N2->getOperand(2);
6189       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6190         // Create the actual or node if we can generate good code for it.
6191         if (!normalizeToSequence) {
6192           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
6193                                    N0, N2_0);
6194           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
6195                              N1, N2_2);
6196         }
6197         // Otherwise see if we can optimize to a better pattern.
6198         if (SDValue Combined = visitORLike(N0, N2_0, N))
6199           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6200                              N1, N2_2);
6201       }
6202     }
6203   }
6204 
6205   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6206   if (VT0 == MVT::i1) {
6207     if (N0->getOpcode() == ISD::XOR) {
6208       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6209         SDValue Cond0 = N0->getOperand(0);
6210         if (C->isOne())
6211           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
6212                              Cond0, N2, N1);
6213       }
6214     }
6215   }
6216 
6217   // fold selects based on a setcc into other things, such as min/max/abs
6218   if (N0.getOpcode() == ISD::SETCC) {
6219     // select x, y (fcmp lt x, y) -> fminnum x, y
6220     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6221     //
6222     // This is OK if we don't care about what happens if either operand is a
6223     // NaN.
6224     //
6225 
6226     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6227     // no signed zeros as well as no nans.
6228     const TargetOptions &Options = DAG.getTarget().Options;
6229     if (Options.UnsafeFPMath &&
6230         VT.isFloatingPoint() && N0.hasOneUse() &&
6231         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6232       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6233 
6234       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
6235                                                 N0.getOperand(1), N1, N2, CC,
6236                                                 TLI, DAG))
6237         return FMinMax;
6238     }
6239 
6240     if ((!LegalOperations &&
6241          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6242         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6243       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
6244                          N0.getOperand(0), N0.getOperand(1),
6245                          N1, N2, N0.getOperand(2));
6246     return SimplifySelect(SDLoc(N), N0, N1, N2);
6247   }
6248 
6249   return SDValue();
6250 }
6251 
6252 static
6253 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6254   SDLoc DL(N);
6255   EVT LoVT, HiVT;
6256   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6257 
6258   // Split the inputs.
6259   SDValue Lo, Hi, LL, LH, RL, RH;
6260   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6261   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6262 
6263   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6264   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6265 
6266   return std::make_pair(Lo, Hi);
6267 }
6268 
6269 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6270 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6271 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6272   SDLoc DL(N);
6273   SDValue Cond = N->getOperand(0);
6274   SDValue LHS = N->getOperand(1);
6275   SDValue RHS = N->getOperand(2);
6276   EVT VT = N->getValueType(0);
6277   int NumElems = VT.getVectorNumElements();
6278   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6279          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6280          Cond.getOpcode() == ISD::BUILD_VECTOR);
6281 
6282   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6283   // binary ones here.
6284   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6285     return SDValue();
6286 
6287   // We're sure we have an even number of elements due to the
6288   // concat_vectors we have as arguments to vselect.
6289   // Skip BV elements until we find one that's not an UNDEF
6290   // After we find an UNDEF element, keep looping until we get to half the
6291   // length of the BV and see if all the non-undef nodes are the same.
6292   ConstantSDNode *BottomHalf = nullptr;
6293   for (int i = 0; i < NumElems / 2; ++i) {
6294     if (Cond->getOperand(i)->isUndef())
6295       continue;
6296 
6297     if (BottomHalf == nullptr)
6298       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6299     else if (Cond->getOperand(i).getNode() != BottomHalf)
6300       return SDValue();
6301   }
6302 
6303   // Do the same for the second half of the BuildVector
6304   ConstantSDNode *TopHalf = nullptr;
6305   for (int i = NumElems / 2; i < NumElems; ++i) {
6306     if (Cond->getOperand(i)->isUndef())
6307       continue;
6308 
6309     if (TopHalf == nullptr)
6310       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6311     else if (Cond->getOperand(i).getNode() != TopHalf)
6312       return SDValue();
6313   }
6314 
6315   assert(TopHalf && BottomHalf &&
6316          "One half of the selector was all UNDEFs and the other was all the "
6317          "same value. This should have been addressed before this function.");
6318   return DAG.getNode(
6319       ISD::CONCAT_VECTORS, DL, VT,
6320       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6321       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6322 }
6323 
6324 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6325 
6326   if (Level >= AfterLegalizeTypes)
6327     return SDValue();
6328 
6329   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6330   SDValue Mask = MSC->getMask();
6331   SDValue Data  = MSC->getValue();
6332   SDLoc DL(N);
6333 
6334   // If the MSCATTER data type requires splitting and the mask is provided by a
6335   // SETCC, then split both nodes and its operands before legalization. This
6336   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6337   // and enables future optimizations (e.g. min/max pattern matching on X86).
6338   if (Mask.getOpcode() != ISD::SETCC)
6339     return SDValue();
6340 
6341   // Check if any splitting is required.
6342   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6343       TargetLowering::TypeSplitVector)
6344     return SDValue();
6345   SDValue MaskLo, MaskHi, Lo, Hi;
6346   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6347 
6348   EVT LoVT, HiVT;
6349   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6350 
6351   SDValue Chain = MSC->getChain();
6352 
6353   EVT MemoryVT = MSC->getMemoryVT();
6354   unsigned Alignment = MSC->getOriginalAlignment();
6355 
6356   EVT LoMemVT, HiMemVT;
6357   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6358 
6359   SDValue DataLo, DataHi;
6360   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6361 
6362   SDValue BasePtr = MSC->getBasePtr();
6363   SDValue IndexLo, IndexHi;
6364   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6365 
6366   MachineMemOperand *MMO = DAG.getMachineFunction().
6367     getMachineMemOperand(MSC->getPointerInfo(),
6368                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6369                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6370 
6371   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6372   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6373                             DL, OpsLo, MMO);
6374 
6375   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6376   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6377                             DL, OpsHi, MMO);
6378 
6379   AddToWorklist(Lo.getNode());
6380   AddToWorklist(Hi.getNode());
6381 
6382   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6383 }
6384 
6385 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6386 
6387   if (Level >= AfterLegalizeTypes)
6388     return SDValue();
6389 
6390   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6391   SDValue Mask = MST->getMask();
6392   SDValue Data  = MST->getValue();
6393   EVT VT = Data.getValueType();
6394   SDLoc DL(N);
6395 
6396   // If the MSTORE data type requires splitting and the mask is provided by a
6397   // SETCC, then split both nodes and its operands before legalization. This
6398   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6399   // and enables future optimizations (e.g. min/max pattern matching on X86).
6400   if (Mask.getOpcode() == ISD::SETCC) {
6401 
6402     // Check if any splitting is required.
6403     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6404         TargetLowering::TypeSplitVector)
6405       return SDValue();
6406 
6407     SDValue MaskLo, MaskHi, Lo, Hi;
6408     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6409 
6410     SDValue Chain = MST->getChain();
6411     SDValue Ptr   = MST->getBasePtr();
6412 
6413     EVT MemoryVT = MST->getMemoryVT();
6414     unsigned Alignment = MST->getOriginalAlignment();
6415 
6416     // if Alignment is equal to the vector size,
6417     // take the half of it for the second part
6418     unsigned SecondHalfAlignment =
6419       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6420 
6421     EVT LoMemVT, HiMemVT;
6422     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6423 
6424     SDValue DataLo, DataHi;
6425     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6426 
6427     MachineMemOperand *MMO = DAG.getMachineFunction().
6428       getMachineMemOperand(MST->getPointerInfo(),
6429                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6430                            Alignment, MST->getAAInfo(), MST->getRanges());
6431 
6432     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6433                             MST->isTruncatingStore(),
6434                             MST->isCompressingStore());
6435 
6436     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6437                                      MST->isCompressingStore());
6438 
6439     MMO = DAG.getMachineFunction().
6440       getMachineMemOperand(MST->getPointerInfo(),
6441                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6442                            SecondHalfAlignment, MST->getAAInfo(),
6443                            MST->getRanges());
6444 
6445     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6446                             MST->isTruncatingStore(),
6447                             MST->isCompressingStore());
6448 
6449     AddToWorklist(Lo.getNode());
6450     AddToWorklist(Hi.getNode());
6451 
6452     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6453   }
6454   return SDValue();
6455 }
6456 
6457 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6458 
6459   if (Level >= AfterLegalizeTypes)
6460     return SDValue();
6461 
6462   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6463   SDValue Mask = MGT->getMask();
6464   SDLoc DL(N);
6465 
6466   // If the MGATHER result requires splitting and the mask is provided by a
6467   // SETCC, then split both nodes and its operands before legalization. This
6468   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6469   // and enables future optimizations (e.g. min/max pattern matching on X86).
6470 
6471   if (Mask.getOpcode() != ISD::SETCC)
6472     return SDValue();
6473 
6474   EVT VT = N->getValueType(0);
6475 
6476   // Check if any splitting is required.
6477   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6478       TargetLowering::TypeSplitVector)
6479     return SDValue();
6480 
6481   SDValue MaskLo, MaskHi, Lo, Hi;
6482   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6483 
6484   SDValue Src0 = MGT->getValue();
6485   SDValue Src0Lo, Src0Hi;
6486   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6487 
6488   EVT LoVT, HiVT;
6489   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6490 
6491   SDValue Chain = MGT->getChain();
6492   EVT MemoryVT = MGT->getMemoryVT();
6493   unsigned Alignment = MGT->getOriginalAlignment();
6494 
6495   EVT LoMemVT, HiMemVT;
6496   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6497 
6498   SDValue BasePtr = MGT->getBasePtr();
6499   SDValue Index = MGT->getIndex();
6500   SDValue IndexLo, IndexHi;
6501   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6502 
6503   MachineMemOperand *MMO = DAG.getMachineFunction().
6504     getMachineMemOperand(MGT->getPointerInfo(),
6505                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6506                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6507 
6508   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6509   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6510                             MMO);
6511 
6512   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6513   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6514                             MMO);
6515 
6516   AddToWorklist(Lo.getNode());
6517   AddToWorklist(Hi.getNode());
6518 
6519   // Build a factor node to remember that this load is independent of the
6520   // other one.
6521   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6522                       Hi.getValue(1));
6523 
6524   // Legalized the chain result - switch anything that used the old chain to
6525   // use the new one.
6526   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6527 
6528   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6529 
6530   SDValue RetOps[] = { GatherRes, Chain };
6531   return DAG.getMergeValues(RetOps, DL);
6532 }
6533 
6534 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6535 
6536   if (Level >= AfterLegalizeTypes)
6537     return SDValue();
6538 
6539   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6540   SDValue Mask = MLD->getMask();
6541   SDLoc DL(N);
6542 
6543   // If the MLOAD result requires splitting and the mask is provided by a
6544   // SETCC, then split both nodes and its operands before legalization. This
6545   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6546   // and enables future optimizations (e.g. min/max pattern matching on X86).
6547 
6548   if (Mask.getOpcode() == ISD::SETCC) {
6549     EVT VT = N->getValueType(0);
6550 
6551     // Check if any splitting is required.
6552     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6553         TargetLowering::TypeSplitVector)
6554       return SDValue();
6555 
6556     SDValue MaskLo, MaskHi, Lo, Hi;
6557     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6558 
6559     SDValue Src0 = MLD->getSrc0();
6560     SDValue Src0Lo, Src0Hi;
6561     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6562 
6563     EVT LoVT, HiVT;
6564     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6565 
6566     SDValue Chain = MLD->getChain();
6567     SDValue Ptr   = MLD->getBasePtr();
6568     EVT MemoryVT = MLD->getMemoryVT();
6569     unsigned Alignment = MLD->getOriginalAlignment();
6570 
6571     // if Alignment is equal to the vector size,
6572     // take the half of it for the second part
6573     unsigned SecondHalfAlignment =
6574       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6575          Alignment/2 : Alignment;
6576 
6577     EVT LoMemVT, HiMemVT;
6578     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6579 
6580     MachineMemOperand *MMO = DAG.getMachineFunction().
6581     getMachineMemOperand(MLD->getPointerInfo(),
6582                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6583                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6584 
6585     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6586                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6587 
6588     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6589                                      MLD->isExpandingLoad());
6590 
6591     MMO = DAG.getMachineFunction().
6592     getMachineMemOperand(MLD->getPointerInfo(),
6593                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6594                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6595 
6596     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6597                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6598 
6599     AddToWorklist(Lo.getNode());
6600     AddToWorklist(Hi.getNode());
6601 
6602     // Build a factor node to remember that this load is independent of the
6603     // other one.
6604     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6605                         Hi.getValue(1));
6606 
6607     // Legalized the chain result - switch anything that used the old chain to
6608     // use the new one.
6609     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6610 
6611     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6612 
6613     SDValue RetOps[] = { LoadRes, Chain };
6614     return DAG.getMergeValues(RetOps, DL);
6615   }
6616   return SDValue();
6617 }
6618 
6619 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6620   SDValue N0 = N->getOperand(0);
6621   SDValue N1 = N->getOperand(1);
6622   SDValue N2 = N->getOperand(2);
6623   SDLoc DL(N);
6624 
6625   // fold (vselect C, X, X) -> X
6626   if (N1 == N2)
6627     return N1;
6628 
6629   // Canonicalize integer abs.
6630   // vselect (setg[te] X,  0),  X, -X ->
6631   // vselect (setgt    X, -1),  X, -X ->
6632   // vselect (setl[te] X,  0), -X,  X ->
6633   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6634   if (N0.getOpcode() == ISD::SETCC) {
6635     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6636     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6637     bool isAbs = false;
6638     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6639 
6640     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6641          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6642         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6643       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6644     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6645              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6646       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6647 
6648     if (isAbs) {
6649       EVT VT = LHS.getValueType();
6650       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6651         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6652 
6653       SDValue Shift = DAG.getNode(
6654           ISD::SRA, DL, VT, LHS,
6655           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6656       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6657       AddToWorklist(Shift.getNode());
6658       AddToWorklist(Add.getNode());
6659       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6660     }
6661   }
6662 
6663   if (SimplifySelectOps(N, N1, N2))
6664     return SDValue(N, 0);  // Don't revisit N.
6665 
6666   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6667   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6668     return N1;
6669   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6670   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6671     return N2;
6672 
6673   // The ConvertSelectToConcatVector function is assuming both the above
6674   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6675   // and addressed.
6676   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6677       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6678       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6679     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6680       return CV;
6681   }
6682 
6683   return SDValue();
6684 }
6685 
6686 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6687   SDValue N0 = N->getOperand(0);
6688   SDValue N1 = N->getOperand(1);
6689   SDValue N2 = N->getOperand(2);
6690   SDValue N3 = N->getOperand(3);
6691   SDValue N4 = N->getOperand(4);
6692   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6693 
6694   // fold select_cc lhs, rhs, x, x, cc -> x
6695   if (N2 == N3)
6696     return N2;
6697 
6698   // Determine if the condition we're dealing with is constant
6699   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6700                                   CC, SDLoc(N), false)) {
6701     AddToWorklist(SCC.getNode());
6702 
6703     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6704       if (!SCCC->isNullValue())
6705         return N2;    // cond always true -> true val
6706       else
6707         return N3;    // cond always false -> false val
6708     } else if (SCC->isUndef()) {
6709       // When the condition is UNDEF, just return the first operand. This is
6710       // coherent the DAG creation, no setcc node is created in this case
6711       return N2;
6712     } else if (SCC.getOpcode() == ISD::SETCC) {
6713       // Fold to a simpler select_cc
6714       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6715                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6716                          SCC.getOperand(2));
6717     }
6718   }
6719 
6720   // If we can fold this based on the true/false value, do so.
6721   if (SimplifySelectOps(N, N2, N3))
6722     return SDValue(N, 0);  // Don't revisit N.
6723 
6724   // fold select_cc into other things, such as min/max/abs
6725   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6726 }
6727 
6728 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6729   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6730                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6731                        SDLoc(N));
6732 }
6733 
6734 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6735   SDValue LHS = N->getOperand(0);
6736   SDValue RHS = N->getOperand(1);
6737   SDValue Carry = N->getOperand(2);
6738   SDValue Cond = N->getOperand(3);
6739 
6740   // If Carry is false, fold to a regular SETCC.
6741   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6742     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6743 
6744   return SDValue();
6745 }
6746 
6747 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6748   SDValue LHS = N->getOperand(0);
6749   SDValue RHS = N->getOperand(1);
6750   SDValue Carry = N->getOperand(2);
6751   SDValue Cond = N->getOperand(3);
6752 
6753   // If Carry is false, fold to a regular SETCC.
6754   if (isNullConstant(Carry))
6755     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6756 
6757   return SDValue();
6758 }
6759 
6760 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6761 /// a build_vector of constants.
6762 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6763 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6764 /// Vector extends are not folded if operations are legal; this is to
6765 /// avoid introducing illegal build_vector dag nodes.
6766 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6767                                          SelectionDAG &DAG, bool LegalTypes,
6768                                          bool LegalOperations) {
6769   unsigned Opcode = N->getOpcode();
6770   SDValue N0 = N->getOperand(0);
6771   EVT VT = N->getValueType(0);
6772 
6773   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6774          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6775          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6776          && "Expected EXTEND dag node in input!");
6777 
6778   // fold (sext c1) -> c1
6779   // fold (zext c1) -> c1
6780   // fold (aext c1) -> c1
6781   if (isa<ConstantSDNode>(N0))
6782     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6783 
6784   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6785   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6786   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6787   EVT SVT = VT.getScalarType();
6788   if (!(VT.isVector() &&
6789       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6790       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6791     return nullptr;
6792 
6793   // We can fold this node into a build_vector.
6794   unsigned VTBits = SVT.getSizeInBits();
6795   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6796   SmallVector<SDValue, 8> Elts;
6797   unsigned NumElts = VT.getVectorNumElements();
6798   SDLoc DL(N);
6799 
6800   for (unsigned i=0; i != NumElts; ++i) {
6801     SDValue Op = N0->getOperand(i);
6802     if (Op->isUndef()) {
6803       Elts.push_back(DAG.getUNDEF(SVT));
6804       continue;
6805     }
6806 
6807     SDLoc DL(Op);
6808     // Get the constant value and if needed trunc it to the size of the type.
6809     // Nodes like build_vector might have constants wider than the scalar type.
6810     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6811     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6812       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6813     else
6814       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6815   }
6816 
6817   return DAG.getBuildVector(VT, DL, Elts).getNode();
6818 }
6819 
6820 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6821 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6822 // transformation. Returns true if extension are possible and the above
6823 // mentioned transformation is profitable.
6824 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6825                                     unsigned ExtOpc,
6826                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6827                                     const TargetLowering &TLI) {
6828   bool HasCopyToRegUses = false;
6829   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6830   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6831                             UE = N0.getNode()->use_end();
6832        UI != UE; ++UI) {
6833     SDNode *User = *UI;
6834     if (User == N)
6835       continue;
6836     if (UI.getUse().getResNo() != N0.getResNo())
6837       continue;
6838     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6839     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6840       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6841       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6842         // Sign bits will be lost after a zext.
6843         return false;
6844       bool Add = false;
6845       for (unsigned i = 0; i != 2; ++i) {
6846         SDValue UseOp = User->getOperand(i);
6847         if (UseOp == N0)
6848           continue;
6849         if (!isa<ConstantSDNode>(UseOp))
6850           return false;
6851         Add = true;
6852       }
6853       if (Add)
6854         ExtendNodes.push_back(User);
6855       continue;
6856     }
6857     // If truncates aren't free and there are users we can't
6858     // extend, it isn't worthwhile.
6859     if (!isTruncFree)
6860       return false;
6861     // Remember if this value is live-out.
6862     if (User->getOpcode() == ISD::CopyToReg)
6863       HasCopyToRegUses = true;
6864   }
6865 
6866   if (HasCopyToRegUses) {
6867     bool BothLiveOut = false;
6868     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6869          UI != UE; ++UI) {
6870       SDUse &Use = UI.getUse();
6871       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6872         BothLiveOut = true;
6873         break;
6874       }
6875     }
6876     if (BothLiveOut)
6877       // Both unextended and extended values are live out. There had better be
6878       // a good reason for the transformation.
6879       return ExtendNodes.size();
6880   }
6881   return true;
6882 }
6883 
6884 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6885                                   SDValue Trunc, SDValue ExtLoad,
6886                                   const SDLoc &DL, ISD::NodeType ExtType) {
6887   // Extend SetCC uses if necessary.
6888   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6889     SDNode *SetCC = SetCCs[i];
6890     SmallVector<SDValue, 4> Ops;
6891 
6892     for (unsigned j = 0; j != 2; ++j) {
6893       SDValue SOp = SetCC->getOperand(j);
6894       if (SOp == Trunc)
6895         Ops.push_back(ExtLoad);
6896       else
6897         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6898     }
6899 
6900     Ops.push_back(SetCC->getOperand(2));
6901     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6902   }
6903 }
6904 
6905 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6906 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6907   SDValue N0 = N->getOperand(0);
6908   EVT DstVT = N->getValueType(0);
6909   EVT SrcVT = N0.getValueType();
6910 
6911   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6912           N->getOpcode() == ISD::ZERO_EXTEND) &&
6913          "Unexpected node type (not an extend)!");
6914 
6915   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6916   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6917   //   (v8i32 (sext (v8i16 (load x))))
6918   // into:
6919   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6920   //                          (v4i32 (sextload (x + 16)))))
6921   // Where uses of the original load, i.e.:
6922   //   (v8i16 (load x))
6923   // are replaced with:
6924   //   (v8i16 (truncate
6925   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6926   //                            (v4i32 (sextload (x + 16)))))))
6927   //
6928   // This combine is only applicable to illegal, but splittable, vectors.
6929   // All legal types, and illegal non-vector types, are handled elsewhere.
6930   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6931   //
6932   if (N0->getOpcode() != ISD::LOAD)
6933     return SDValue();
6934 
6935   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6936 
6937   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6938       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6939       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6940     return SDValue();
6941 
6942   SmallVector<SDNode *, 4> SetCCs;
6943   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6944     return SDValue();
6945 
6946   ISD::LoadExtType ExtType =
6947       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6948 
6949   // Try to split the vector types to get down to legal types.
6950   EVT SplitSrcVT = SrcVT;
6951   EVT SplitDstVT = DstVT;
6952   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6953          SplitSrcVT.getVectorNumElements() > 1) {
6954     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6955     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6956   }
6957 
6958   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6959     return SDValue();
6960 
6961   SDLoc DL(N);
6962   const unsigned NumSplits =
6963       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6964   const unsigned Stride = SplitSrcVT.getStoreSize();
6965   SmallVector<SDValue, 4> Loads;
6966   SmallVector<SDValue, 4> Chains;
6967 
6968   SDValue BasePtr = LN0->getBasePtr();
6969   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6970     const unsigned Offset = Idx * Stride;
6971     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6972 
6973     SDValue SplitLoad = DAG.getExtLoad(
6974         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6975         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6976         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6977 
6978     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6979                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6980 
6981     Loads.push_back(SplitLoad.getValue(0));
6982     Chains.push_back(SplitLoad.getValue(1));
6983   }
6984 
6985   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6986   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6987 
6988   // Simplify TF.
6989   AddToWorklist(NewChain.getNode());
6990 
6991   CombineTo(N, NewValue);
6992 
6993   // Replace uses of the original load (before extension)
6994   // with a truncate of the concatenated sextloaded vectors.
6995   SDValue Trunc =
6996       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6997   CombineTo(N0.getNode(), Trunc, NewChain);
6998   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6999                   (ISD::NodeType)N->getOpcode());
7000   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7001 }
7002 
7003 /// If we're narrowing or widening the result of a vector select and the final
7004 /// size is the same size as a setcc (compare) feeding the select, then try to
7005 /// apply the cast operation to the select's operands because matching vector
7006 /// sizes for a select condition and other operands should be more efficient.
7007 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7008   unsigned CastOpcode = Cast->getOpcode();
7009   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7010           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7011           CastOpcode == ISD::FP_ROUND) &&
7012          "Unexpected opcode for vector select narrowing/widening");
7013 
7014   // We only do this transform before legal ops because the pattern may be
7015   // obfuscated by target-specific operations after legalization. Do not create
7016   // an illegal select op, however, because that may be difficult to lower.
7017   EVT VT = Cast->getValueType(0);
7018   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7019     return SDValue();
7020 
7021   SDValue VSel = Cast->getOperand(0);
7022   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7023       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7024     return SDValue();
7025 
7026   // Does the setcc have the same vector size as the casted select?
7027   SDValue SetCC = VSel.getOperand(0);
7028   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7029   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7030     return SDValue();
7031 
7032   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7033   SDValue A = VSel.getOperand(1);
7034   SDValue B = VSel.getOperand(2);
7035   SDValue CastA, CastB;
7036   SDLoc DL(Cast);
7037   if (CastOpcode == ISD::FP_ROUND) {
7038     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7039     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7040     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7041   } else {
7042     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7043     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7044   }
7045   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7046 }
7047 
7048 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7049   SDValue N0 = N->getOperand(0);
7050   EVT VT = N->getValueType(0);
7051   SDLoc DL(N);
7052 
7053   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7054                                               LegalOperations))
7055     return SDValue(Res, 0);
7056 
7057   // fold (sext (sext x)) -> (sext x)
7058   // fold (sext (aext x)) -> (sext x)
7059   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7060     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7061 
7062   if (N0.getOpcode() == ISD::TRUNCATE) {
7063     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7064     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7065     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7066       SDNode *oye = N0.getOperand(0).getNode();
7067       if (NarrowLoad.getNode() != N0.getNode()) {
7068         CombineTo(N0.getNode(), NarrowLoad);
7069         // CombineTo deleted the truncate, if needed, but not what's under it.
7070         AddToWorklist(oye);
7071       }
7072       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7073     }
7074 
7075     // See if the value being truncated is already sign extended.  If so, just
7076     // eliminate the trunc/sext pair.
7077     SDValue Op = N0.getOperand(0);
7078     unsigned OpBits   = Op.getScalarValueSizeInBits();
7079     unsigned MidBits  = N0.getScalarValueSizeInBits();
7080     unsigned DestBits = VT.getScalarSizeInBits();
7081     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7082 
7083     if (OpBits == DestBits) {
7084       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7085       // bits, it is already ready.
7086       if (NumSignBits > DestBits-MidBits)
7087         return Op;
7088     } else if (OpBits < DestBits) {
7089       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7090       // bits, just sext from i32.
7091       if (NumSignBits > OpBits-MidBits)
7092         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7093     } else {
7094       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7095       // bits, just truncate to i32.
7096       if (NumSignBits > OpBits-MidBits)
7097         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7098     }
7099 
7100     // fold (sext (truncate x)) -> (sextinreg x).
7101     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7102                                                  N0.getValueType())) {
7103       if (OpBits < DestBits)
7104         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7105       else if (OpBits > DestBits)
7106         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7107       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7108                          DAG.getValueType(N0.getValueType()));
7109     }
7110   }
7111 
7112   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7113   // Only generate vector extloads when 1) they're legal, and 2) they are
7114   // deemed desirable by the target.
7115   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7116       ((!LegalOperations && !VT.isVector() &&
7117         !cast<LoadSDNode>(N0)->isVolatile()) ||
7118        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7119     bool DoXform = true;
7120     SmallVector<SDNode*, 4> SetCCs;
7121     if (!N0.hasOneUse())
7122       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7123     if (VT.isVector())
7124       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7125     if (DoXform) {
7126       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7127       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7128                                        LN0->getBasePtr(), N0.getValueType(),
7129                                        LN0->getMemOperand());
7130       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7131                                   N0.getValueType(), ExtLoad);
7132       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7133       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7134       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7135     }
7136   }
7137 
7138   // fold (sext (load x)) to multiple smaller sextloads.
7139   // Only on illegal but splittable vectors.
7140   if (SDValue ExtLoad = CombineExtLoad(N))
7141     return ExtLoad;
7142 
7143   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7144   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7145   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7146       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7147     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7148     EVT MemVT = LN0->getMemoryVT();
7149     if ((!LegalOperations && !LN0->isVolatile()) ||
7150         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7151       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7152                                        LN0->getBasePtr(), MemVT,
7153                                        LN0->getMemOperand());
7154       CombineTo(N, ExtLoad);
7155       CombineTo(N0.getNode(),
7156                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7157                             N0.getValueType(), ExtLoad),
7158                 ExtLoad.getValue(1));
7159       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7160     }
7161   }
7162 
7163   // fold (sext (and/or/xor (load x), cst)) ->
7164   //      (and/or/xor (sextload x), (sext cst))
7165   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7166        N0.getOpcode() == ISD::XOR) &&
7167       isa<LoadSDNode>(N0.getOperand(0)) &&
7168       N0.getOperand(1).getOpcode() == ISD::Constant &&
7169       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7170       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7171     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7172     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7173       bool DoXform = true;
7174       SmallVector<SDNode*, 4> SetCCs;
7175       if (!N0.hasOneUse())
7176         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7177                                           SetCCs, TLI);
7178       if (DoXform) {
7179         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7180                                          LN0->getChain(), LN0->getBasePtr(),
7181                                          LN0->getMemoryVT(),
7182                                          LN0->getMemOperand());
7183         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7184         Mask = Mask.sext(VT.getSizeInBits());
7185         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7186                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7187         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7188                                     SDLoc(N0.getOperand(0)),
7189                                     N0.getOperand(0).getValueType(), ExtLoad);
7190         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7191         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7192         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7193       }
7194     }
7195   }
7196 
7197   if (N0.getOpcode() == ISD::SETCC) {
7198     SDValue N00 = N0.getOperand(0);
7199     SDValue N01 = N0.getOperand(1);
7200     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7201     EVT N00VT = N0.getOperand(0).getValueType();
7202 
7203     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7204     // Only do this before legalize for now.
7205     if (VT.isVector() && !LegalOperations &&
7206         TLI.getBooleanContents(N00VT) ==
7207             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7208       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7209       // of the same size as the compared operands. Only optimize sext(setcc())
7210       // if this is the case.
7211       EVT SVT = getSetCCResultType(N00VT);
7212 
7213       // We know that the # elements of the results is the same as the
7214       // # elements of the compare (and the # elements of the compare result
7215       // for that matter).  Check to see that they are the same size.  If so,
7216       // we know that the element size of the sext'd result matches the
7217       // element size of the compare operands.
7218       if (VT.getSizeInBits() == SVT.getSizeInBits())
7219         return DAG.getSetCC(DL, VT, N00, N01, CC);
7220 
7221       // If the desired elements are smaller or larger than the source
7222       // elements, we can use a matching integer vector type and then
7223       // truncate/sign extend.
7224       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7225       if (SVT == MatchingVecType) {
7226         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7227         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7228       }
7229     }
7230 
7231     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7232     // Here, T can be 1 or -1, depending on the type of the setcc and
7233     // getBooleanContents().
7234     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7235 
7236     // To determine the "true" side of the select, we need to know the high bit
7237     // of the value returned by the setcc if it evaluates to true.
7238     // If the type of the setcc is i1, then the true case of the select is just
7239     // sext(i1 1), that is, -1.
7240     // If the type of the setcc is larger (say, i8) then the value of the high
7241     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7242     // of the appropriate width.
7243     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7244                                            : TLI.getConstTrueVal(DAG, VT, DL);
7245     SDValue Zero = DAG.getConstant(0, DL, VT);
7246     if (SDValue SCC =
7247             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7248       return SCC;
7249 
7250     if (!VT.isVector()) {
7251       EVT SetCCVT = getSetCCResultType(N00VT);
7252       // Don't do this transform for i1 because there's a select transform
7253       // that would reverse it.
7254       // TODO: We should not do this transform at all without a target hook
7255       // because a sext is likely cheaper than a select?
7256       if (SetCCVT.getScalarSizeInBits() != 1 &&
7257           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7258         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7259         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7260       }
7261     }
7262   }
7263 
7264   // fold (sext x) -> (zext x) if the sign bit is known zero.
7265   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7266       DAG.SignBitIsZero(N0))
7267     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7268 
7269   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7270     return NewVSel;
7271 
7272   return SDValue();
7273 }
7274 
7275 // isTruncateOf - If N is a truncate of some other value, return true, record
7276 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7277 // This function computes KnownBits to avoid a duplicated call to
7278 // computeKnownBits in the caller.
7279 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7280                          KnownBits &Known) {
7281   if (N->getOpcode() == ISD::TRUNCATE) {
7282     Op = N->getOperand(0);
7283     DAG.computeKnownBits(Op, Known);
7284     return true;
7285   }
7286 
7287   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7288       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7289     return false;
7290 
7291   SDValue Op0 = N->getOperand(0);
7292   SDValue Op1 = N->getOperand(1);
7293   assert(Op0.getValueType() == Op1.getValueType());
7294 
7295   if (isNullConstant(Op0))
7296     Op = Op1;
7297   else if (isNullConstant(Op1))
7298     Op = Op0;
7299   else
7300     return false;
7301 
7302   DAG.computeKnownBits(Op, Known);
7303 
7304   if (!(Known.Zero | 1).isAllOnesValue())
7305     return false;
7306 
7307   return true;
7308 }
7309 
7310 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7311   SDValue N0 = N->getOperand(0);
7312   EVT VT = N->getValueType(0);
7313 
7314   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7315                                               LegalOperations))
7316     return SDValue(Res, 0);
7317 
7318   // fold (zext (zext x)) -> (zext x)
7319   // fold (zext (aext x)) -> (zext x)
7320   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7321     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7322                        N0.getOperand(0));
7323 
7324   // fold (zext (truncate x)) -> (zext x) or
7325   //      (zext (truncate x)) -> (truncate x)
7326   // This is valid when the truncated bits of x are already zero.
7327   // FIXME: We should extend this to work for vectors too.
7328   SDValue Op;
7329   KnownBits Known;
7330   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7331     APInt TruncatedBits =
7332       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7333       APInt(Op.getValueSizeInBits(), 0) :
7334       APInt::getBitsSet(Op.getValueSizeInBits(),
7335                         N0.getValueSizeInBits(),
7336                         std::min(Op.getValueSizeInBits(),
7337                                  VT.getSizeInBits()));
7338     if (TruncatedBits.isSubsetOf(Known.Zero))
7339       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7340   }
7341 
7342   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7343   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7344   if (N0.getOpcode() == ISD::TRUNCATE) {
7345     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7346       SDNode *oye = N0.getOperand(0).getNode();
7347       if (NarrowLoad.getNode() != N0.getNode()) {
7348         CombineTo(N0.getNode(), NarrowLoad);
7349         // CombineTo deleted the truncate, if needed, but not what's under it.
7350         AddToWorklist(oye);
7351       }
7352       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7353     }
7354   }
7355 
7356   // fold (zext (truncate x)) -> (and x, mask)
7357   if (N0.getOpcode() == ISD::TRUNCATE) {
7358     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7359     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7360     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7361       SDNode *oye = N0.getOperand(0).getNode();
7362       if (NarrowLoad.getNode() != N0.getNode()) {
7363         CombineTo(N0.getNode(), NarrowLoad);
7364         // CombineTo deleted the truncate, if needed, but not what's under it.
7365         AddToWorklist(oye);
7366       }
7367       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7368     }
7369 
7370     EVT SrcVT = N0.getOperand(0).getValueType();
7371     EVT MinVT = N0.getValueType();
7372 
7373     // Try to mask before the extension to avoid having to generate a larger mask,
7374     // possibly over several sub-vectors.
7375     if (SrcVT.bitsLT(VT)) {
7376       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7377                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7378         SDValue Op = N0.getOperand(0);
7379         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7380         AddToWorklist(Op.getNode());
7381         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7382       }
7383     }
7384 
7385     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7386       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7387       AddToWorklist(Op.getNode());
7388       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7389     }
7390   }
7391 
7392   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7393   // if either of the casts is not free.
7394   if (N0.getOpcode() == ISD::AND &&
7395       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7396       N0.getOperand(1).getOpcode() == ISD::Constant &&
7397       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7398                            N0.getValueType()) ||
7399        !TLI.isZExtFree(N0.getValueType(), VT))) {
7400     SDValue X = N0.getOperand(0).getOperand(0);
7401     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7402     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7403     Mask = Mask.zext(VT.getSizeInBits());
7404     SDLoc DL(N);
7405     return DAG.getNode(ISD::AND, DL, VT,
7406                        X, DAG.getConstant(Mask, DL, VT));
7407   }
7408 
7409   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7410   // Only generate vector extloads when 1) they're legal, and 2) they are
7411   // deemed desirable by the target.
7412   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7413       ((!LegalOperations && !VT.isVector() &&
7414         !cast<LoadSDNode>(N0)->isVolatile()) ||
7415        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7416     bool DoXform = true;
7417     SmallVector<SDNode*, 4> SetCCs;
7418     if (!N0.hasOneUse())
7419       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7420     if (VT.isVector())
7421       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7422     if (DoXform) {
7423       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7424       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7425                                        LN0->getChain(),
7426                                        LN0->getBasePtr(), N0.getValueType(),
7427                                        LN0->getMemOperand());
7428 
7429       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7430                                   N0.getValueType(), ExtLoad);
7431       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7432       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7433       return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
7434     }
7435   }
7436 
7437   // fold (zext (load x)) to multiple smaller zextloads.
7438   // Only on illegal but splittable vectors.
7439   if (SDValue ExtLoad = CombineExtLoad(N))
7440     return ExtLoad;
7441 
7442   // fold (zext (and/or/xor (load x), cst)) ->
7443   //      (and/or/xor (zextload x), (zext cst))
7444   // Unless (and (load x) cst) will match as a zextload already and has
7445   // additional users.
7446   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7447        N0.getOpcode() == ISD::XOR) &&
7448       isa<LoadSDNode>(N0.getOperand(0)) &&
7449       N0.getOperand(1).getOpcode() == ISD::Constant &&
7450       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7451       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7452     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7453     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7454       bool DoXform = true;
7455       SmallVector<SDNode*, 4> SetCCs;
7456       if (!N0.hasOneUse()) {
7457         if (N0.getOpcode() == ISD::AND) {
7458           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7459           auto NarrowLoad = false;
7460           EVT LoadResultTy = AndC->getValueType(0);
7461           EVT ExtVT, LoadedVT;
7462           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7463                                NarrowLoad))
7464             DoXform = false;
7465         }
7466         if (DoXform)
7467           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7468                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7469       }
7470       if (DoXform) {
7471         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7472                                          LN0->getChain(), LN0->getBasePtr(),
7473                                          LN0->getMemoryVT(),
7474                                          LN0->getMemOperand());
7475         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7476         Mask = Mask.zext(VT.getSizeInBits());
7477         SDLoc DL(N);
7478         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7479                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7480         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7481                                     SDLoc(N0.getOperand(0)),
7482                                     N0.getOperand(0).getValueType(), ExtLoad);
7483         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7484         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7485         return CombineTo(N, And); // Return N so it doesn't get rechecked!
7486       }
7487     }
7488   }
7489 
7490   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7491   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7492   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7493       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7494     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7495     EVT MemVT = LN0->getMemoryVT();
7496     if ((!LegalOperations && !LN0->isVolatile()) ||
7497         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7498       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7499                                        LN0->getChain(),
7500                                        LN0->getBasePtr(), MemVT,
7501                                        LN0->getMemOperand());
7502       CombineTo(N, ExtLoad);
7503       CombineTo(N0.getNode(),
7504                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7505                             ExtLoad),
7506                 ExtLoad.getValue(1));
7507       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7508     }
7509   }
7510 
7511   if (N0.getOpcode() == ISD::SETCC) {
7512     // Only do this before legalize for now.
7513     if (!LegalOperations && VT.isVector() &&
7514         N0.getValueType().getVectorElementType() == MVT::i1) {
7515       EVT N00VT = N0.getOperand(0).getValueType();
7516       if (getSetCCResultType(N00VT) == N0.getValueType())
7517         return SDValue();
7518 
7519       // We know that the # elements of the results is the same as the #
7520       // elements of the compare (and the # elements of the compare result for
7521       // that matter). Check to see that they are the same size. If so, we know
7522       // that the element size of the sext'd result matches the element size of
7523       // the compare operands.
7524       SDLoc DL(N);
7525       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7526       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7527         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7528         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7529                                      N0.getOperand(1), N0.getOperand(2));
7530         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7531       }
7532 
7533       // If the desired elements are smaller or larger than the source
7534       // elements we can use a matching integer vector type and then
7535       // truncate/sign extend.
7536       EVT MatchingElementType = EVT::getIntegerVT(
7537           *DAG.getContext(), N00VT.getScalarSizeInBits());
7538       EVT MatchingVectorType = EVT::getVectorVT(
7539           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7540       SDValue VsetCC =
7541           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7542                       N0.getOperand(1), N0.getOperand(2));
7543       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7544                          VecOnes);
7545     }
7546 
7547     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7548     SDLoc DL(N);
7549     if (SDValue SCC = SimplifySelectCC(
7550             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7551             DAG.getConstant(0, DL, VT),
7552             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7553       return SCC;
7554   }
7555 
7556   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7557   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7558       isa<ConstantSDNode>(N0.getOperand(1)) &&
7559       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7560       N0.hasOneUse()) {
7561     SDValue ShAmt = N0.getOperand(1);
7562     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7563     if (N0.getOpcode() == ISD::SHL) {
7564       SDValue InnerZExt = N0.getOperand(0);
7565       // If the original shl may be shifting out bits, do not perform this
7566       // transformation.
7567       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7568         InnerZExt.getOperand(0).getValueSizeInBits();
7569       if (ShAmtVal > KnownZeroBits)
7570         return SDValue();
7571     }
7572 
7573     SDLoc DL(N);
7574 
7575     // Ensure that the shift amount is wide enough for the shifted value.
7576     if (VT.getSizeInBits() >= 256)
7577       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7578 
7579     return DAG.getNode(N0.getOpcode(), DL, VT,
7580                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7581                        ShAmt);
7582   }
7583 
7584   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7585     return NewVSel;
7586 
7587   return SDValue();
7588 }
7589 
7590 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7591   SDValue N0 = N->getOperand(0);
7592   EVT VT = N->getValueType(0);
7593 
7594   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7595                                               LegalOperations))
7596     return SDValue(Res, 0);
7597 
7598   // fold (aext (aext x)) -> (aext x)
7599   // fold (aext (zext x)) -> (zext x)
7600   // fold (aext (sext x)) -> (sext x)
7601   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7602       N0.getOpcode() == ISD::ZERO_EXTEND ||
7603       N0.getOpcode() == ISD::SIGN_EXTEND)
7604     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7605 
7606   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7607   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7608   if (N0.getOpcode() == ISD::TRUNCATE) {
7609     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7610       SDNode *oye = N0.getOperand(0).getNode();
7611       if (NarrowLoad.getNode() != N0.getNode()) {
7612         CombineTo(N0.getNode(), NarrowLoad);
7613         // CombineTo deleted the truncate, if needed, but not what's under it.
7614         AddToWorklist(oye);
7615       }
7616       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7617     }
7618   }
7619 
7620   // fold (aext (truncate x))
7621   if (N0.getOpcode() == ISD::TRUNCATE)
7622     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7623 
7624   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7625   // if the trunc is not free.
7626   if (N0.getOpcode() == ISD::AND &&
7627       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7628       N0.getOperand(1).getOpcode() == ISD::Constant &&
7629       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7630                           N0.getValueType())) {
7631     SDLoc DL(N);
7632     SDValue X = N0.getOperand(0).getOperand(0);
7633     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7634     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7635     Mask = Mask.zext(VT.getSizeInBits());
7636     return DAG.getNode(ISD::AND, DL, VT,
7637                        X, DAG.getConstant(Mask, DL, VT));
7638   }
7639 
7640   // fold (aext (load x)) -> (aext (truncate (extload x)))
7641   // None of the supported targets knows how to perform load and any_ext
7642   // on vectors in one instruction.  We only perform this transformation on
7643   // scalars.
7644   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7645       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7646       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7647     bool DoXform = true;
7648     SmallVector<SDNode*, 4> SetCCs;
7649     if (!N0.hasOneUse())
7650       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7651     if (DoXform) {
7652       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7653       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7654                                        LN0->getChain(),
7655                                        LN0->getBasePtr(), N0.getValueType(),
7656                                        LN0->getMemOperand());
7657       CombineTo(N, ExtLoad);
7658       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7659                                   N0.getValueType(), ExtLoad);
7660       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7661       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7662                       ISD::ANY_EXTEND);
7663       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7664     }
7665   }
7666 
7667   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7668   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7669   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7670   if (N0.getOpcode() == ISD::LOAD &&
7671       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7672       N0.hasOneUse()) {
7673     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7674     ISD::LoadExtType ExtType = LN0->getExtensionType();
7675     EVT MemVT = LN0->getMemoryVT();
7676     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7677       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7678                                        VT, LN0->getChain(), LN0->getBasePtr(),
7679                                        MemVT, LN0->getMemOperand());
7680       CombineTo(N, ExtLoad);
7681       CombineTo(N0.getNode(),
7682                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7683                             N0.getValueType(), ExtLoad),
7684                 ExtLoad.getValue(1));
7685       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7686     }
7687   }
7688 
7689   if (N0.getOpcode() == ISD::SETCC) {
7690     // For vectors:
7691     // aext(setcc) -> vsetcc
7692     // aext(setcc) -> truncate(vsetcc)
7693     // aext(setcc) -> aext(vsetcc)
7694     // Only do this before legalize for now.
7695     if (VT.isVector() && !LegalOperations) {
7696       EVT N0VT = N0.getOperand(0).getValueType();
7697         // We know that the # elements of the results is the same as the
7698         // # elements of the compare (and the # elements of the compare result
7699         // for that matter).  Check to see that they are the same size.  If so,
7700         // we know that the element size of the sext'd result matches the
7701         // element size of the compare operands.
7702       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7703         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7704                              N0.getOperand(1),
7705                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7706       // If the desired elements are smaller or larger than the source
7707       // elements we can use a matching integer vector type and then
7708       // truncate/any extend
7709       else {
7710         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7711         SDValue VsetCC =
7712           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7713                         N0.getOperand(1),
7714                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7715         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7716       }
7717     }
7718 
7719     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7720     SDLoc DL(N);
7721     if (SDValue SCC = SimplifySelectCC(
7722             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7723             DAG.getConstant(0, DL, VT),
7724             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7725       return SCC;
7726   }
7727 
7728   return SDValue();
7729 }
7730 
7731 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7732   SDValue N0 = N->getOperand(0);
7733   SDValue N1 = N->getOperand(1);
7734   EVT EVT = cast<VTSDNode>(N1)->getVT();
7735 
7736   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7737   if (N0.getOpcode() == ISD::AssertZext &&
7738       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7739     return N0;
7740 
7741   return SDValue();
7742 }
7743 
7744 /// See if the specified operand can be simplified with the knowledge that only
7745 /// the bits specified by Mask are used.  If so, return the simpler operand,
7746 /// otherwise return a null SDValue.
7747 ///
7748 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
7749 /// simplify nodes with multiple uses more aggressively.)
7750 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
7751   switch (V.getOpcode()) {
7752   default: break;
7753   case ISD::Constant: {
7754     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7755     assert(CV && "Const value should be ConstSDNode.");
7756     const APInt &CVal = CV->getAPIntValue();
7757     APInt NewVal = CVal & Mask;
7758     if (NewVal != CVal)
7759       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7760     break;
7761   }
7762   case ISD::OR:
7763   case ISD::XOR:
7764     // If the LHS or RHS don't contribute bits to the or, drop them.
7765     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7766       return V.getOperand(1);
7767     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7768       return V.getOperand(0);
7769     break;
7770   case ISD::SRL:
7771     // Only look at single-use SRLs.
7772     if (!V.getNode()->hasOneUse())
7773       break;
7774     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7775       // See if we can recursively simplify the LHS.
7776       unsigned Amt = RHSC->getZExtValue();
7777 
7778       // Watch out for shift count overflow though.
7779       if (Amt >= Mask.getBitWidth()) break;
7780       APInt NewMask = Mask << Amt;
7781       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7782         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7783                            SimplifyLHS, V.getOperand(1));
7784     }
7785     break;
7786   case ISD::AND: {
7787     // X & -1 -> X (ignoring bits which aren't demanded).
7788     ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
7789     if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
7790       return V.getOperand(0);
7791     break;
7792   }
7793   }
7794   return SDValue();
7795 }
7796 
7797 /// If the result of a wider load is shifted to right of N  bits and then
7798 /// truncated to a narrower type and where N is a multiple of number of bits of
7799 /// the narrower type, transform it to a narrower load from address + N / num of
7800 /// bits of new type. If the result is to be extended, also fold the extension
7801 /// to form a extending load.
7802 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7803   unsigned Opc = N->getOpcode();
7804 
7805   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7806   SDValue N0 = N->getOperand(0);
7807   EVT VT = N->getValueType(0);
7808   EVT ExtVT = VT;
7809 
7810   // This transformation isn't valid for vector loads.
7811   if (VT.isVector())
7812     return SDValue();
7813 
7814   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7815   // extended to VT.
7816   if (Opc == ISD::SIGN_EXTEND_INREG) {
7817     ExtType = ISD::SEXTLOAD;
7818     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7819   } else if (Opc == ISD::SRL) {
7820     // Another special-case: SRL is basically zero-extending a narrower value.
7821     ExtType = ISD::ZEXTLOAD;
7822     N0 = SDValue(N, 0);
7823     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7824     if (!N01) return SDValue();
7825     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7826                               VT.getSizeInBits() - N01->getZExtValue());
7827   }
7828   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7829     return SDValue();
7830 
7831   unsigned EVTBits = ExtVT.getSizeInBits();
7832 
7833   // Do not generate loads of non-round integer types since these can
7834   // be expensive (and would be wrong if the type is not byte sized).
7835   if (!ExtVT.isRound())
7836     return SDValue();
7837 
7838   unsigned ShAmt = 0;
7839   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7840     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7841       ShAmt = N01->getZExtValue();
7842       // Is the shift amount a multiple of size of VT?
7843       if ((ShAmt & (EVTBits-1)) == 0) {
7844         N0 = N0.getOperand(0);
7845         // Is the load width a multiple of size of VT?
7846         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7847           return SDValue();
7848       }
7849 
7850       // At this point, we must have a load or else we can't do the transform.
7851       if (!isa<LoadSDNode>(N0)) return SDValue();
7852 
7853       // Because a SRL must be assumed to *need* to zero-extend the high bits
7854       // (as opposed to anyext the high bits), we can't combine the zextload
7855       // lowering of SRL and an sextload.
7856       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7857         return SDValue();
7858 
7859       // If the shift amount is larger than the input type then we're not
7860       // accessing any of the loaded bytes.  If the load was a zextload/extload
7861       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7862       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7863         return SDValue();
7864     }
7865   }
7866 
7867   // If the load is shifted left (and the result isn't shifted back right),
7868   // we can fold the truncate through the shift.
7869   unsigned ShLeftAmt = 0;
7870   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7871       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7872     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7873       ShLeftAmt = N01->getZExtValue();
7874       N0 = N0.getOperand(0);
7875     }
7876   }
7877 
7878   // If we haven't found a load, we can't narrow it.  Don't transform one with
7879   // multiple uses, this would require adding a new load.
7880   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7881     return SDValue();
7882 
7883   // Don't change the width of a volatile load.
7884   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7885   if (LN0->isVolatile())
7886     return SDValue();
7887 
7888   // Verify that we are actually reducing a load width here.
7889   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7890     return SDValue();
7891 
7892   // For the transform to be legal, the load must produce only two values
7893   // (the value loaded and the chain).  Don't transform a pre-increment
7894   // load, for example, which produces an extra value.  Otherwise the
7895   // transformation is not equivalent, and the downstream logic to replace
7896   // uses gets things wrong.
7897   if (LN0->getNumValues() > 2)
7898     return SDValue();
7899 
7900   // If the load that we're shrinking is an extload and we're not just
7901   // discarding the extension we can't simply shrink the load. Bail.
7902   // TODO: It would be possible to merge the extensions in some cases.
7903   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7904       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7905     return SDValue();
7906 
7907   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7908     return SDValue();
7909 
7910   EVT PtrType = N0.getOperand(1).getValueType();
7911 
7912   if (PtrType == MVT::Untyped || PtrType.isExtended())
7913     // It's not possible to generate a constant of extended or untyped type.
7914     return SDValue();
7915 
7916   // For big endian targets, we need to adjust the offset to the pointer to
7917   // load the correct bytes.
7918   if (DAG.getDataLayout().isBigEndian()) {
7919     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7920     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7921     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7922   }
7923 
7924   uint64_t PtrOff = ShAmt / 8;
7925   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7926   SDLoc DL(LN0);
7927   // The original load itself didn't wrap, so an offset within it doesn't.
7928   SDNodeFlags Flags;
7929   Flags.setNoUnsignedWrap(true);
7930   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7931                                PtrType, LN0->getBasePtr(),
7932                                DAG.getConstant(PtrOff, DL, PtrType),
7933                                Flags);
7934   AddToWorklist(NewPtr.getNode());
7935 
7936   SDValue Load;
7937   if (ExtType == ISD::NON_EXTLOAD)
7938     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7939                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7940                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7941   else
7942     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7943                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7944                           NewAlign, LN0->getMemOperand()->getFlags(),
7945                           LN0->getAAInfo());
7946 
7947   // Replace the old load's chain with the new load's chain.
7948   WorklistRemover DeadNodes(*this);
7949   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7950 
7951   // Shift the result left, if we've swallowed a left shift.
7952   SDValue Result = Load;
7953   if (ShLeftAmt != 0) {
7954     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7955     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7956       ShImmTy = VT;
7957     // If the shift amount is as large as the result size (but, presumably,
7958     // no larger than the source) then the useful bits of the result are
7959     // zero; we can't simply return the shortened shift, because the result
7960     // of that operation is undefined.
7961     SDLoc DL(N0);
7962     if (ShLeftAmt >= VT.getSizeInBits())
7963       Result = DAG.getConstant(0, DL, VT);
7964     else
7965       Result = DAG.getNode(ISD::SHL, DL, VT,
7966                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7967   }
7968 
7969   // Return the new loaded value.
7970   return Result;
7971 }
7972 
7973 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7974   SDValue N0 = N->getOperand(0);
7975   SDValue N1 = N->getOperand(1);
7976   EVT VT = N->getValueType(0);
7977   EVT EVT = cast<VTSDNode>(N1)->getVT();
7978   unsigned VTBits = VT.getScalarSizeInBits();
7979   unsigned EVTBits = EVT.getScalarSizeInBits();
7980 
7981   if (N0.isUndef())
7982     return DAG.getUNDEF(VT);
7983 
7984   // fold (sext_in_reg c1) -> c1
7985   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7986     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7987 
7988   // If the input is already sign extended, just drop the extension.
7989   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7990     return N0;
7991 
7992   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7993   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7994       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7995     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7996                        N0.getOperand(0), N1);
7997 
7998   // fold (sext_in_reg (sext x)) -> (sext x)
7999   // fold (sext_in_reg (aext x)) -> (sext x)
8000   // if x is small enough.
8001   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8002     SDValue N00 = N0.getOperand(0);
8003     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8004         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8005       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8006   }
8007 
8008   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8009   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8010        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8011        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8012       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8013     if (!LegalOperations ||
8014         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8015       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8016   }
8017 
8018   // fold (sext_in_reg (zext x)) -> (sext x)
8019   // iff we are extending the source sign bit.
8020   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8021     SDValue N00 = N0.getOperand(0);
8022     if (N00.getScalarValueSizeInBits() == EVTBits &&
8023         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8024       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8025   }
8026 
8027   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8028   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8029     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8030 
8031   // fold operands of sext_in_reg based on knowledge that the top bits are not
8032   // demanded.
8033   if (SimplifyDemandedBits(SDValue(N, 0)))
8034     return SDValue(N, 0);
8035 
8036   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8037   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8038   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8039     return NarrowLoad;
8040 
8041   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8042   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8043   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8044   if (N0.getOpcode() == ISD::SRL) {
8045     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8046       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8047         // We can turn this into an SRA iff the input to the SRL is already sign
8048         // extended enough.
8049         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8050         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8051           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8052                              N0.getOperand(0), N0.getOperand(1));
8053       }
8054   }
8055 
8056   // fold (sext_inreg (extload x)) -> (sextload x)
8057   if (ISD::isEXTLoad(N0.getNode()) &&
8058       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8059       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8060       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8061        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8062     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8063     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8064                                      LN0->getChain(),
8065                                      LN0->getBasePtr(), EVT,
8066                                      LN0->getMemOperand());
8067     CombineTo(N, ExtLoad);
8068     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8069     AddToWorklist(ExtLoad.getNode());
8070     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8071   }
8072   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8073   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8074       N0.hasOneUse() &&
8075       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8076       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8077        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8078     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8079     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8080                                      LN0->getChain(),
8081                                      LN0->getBasePtr(), EVT,
8082                                      LN0->getMemOperand());
8083     CombineTo(N, ExtLoad);
8084     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8085     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8086   }
8087 
8088   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8089   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8090     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8091                                            N0.getOperand(1), false))
8092       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8093                          BSwap, N1);
8094   }
8095 
8096   return SDValue();
8097 }
8098 
8099 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8100   SDValue N0 = N->getOperand(0);
8101   EVT VT = N->getValueType(0);
8102 
8103   if (N0.isUndef())
8104     return DAG.getUNDEF(VT);
8105 
8106   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8107                                               LegalOperations))
8108     return SDValue(Res, 0);
8109 
8110   return SDValue();
8111 }
8112 
8113 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8114   SDValue N0 = N->getOperand(0);
8115   EVT VT = N->getValueType(0);
8116 
8117   if (N0.isUndef())
8118     return DAG.getUNDEF(VT);
8119 
8120   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8121                                               LegalOperations))
8122     return SDValue(Res, 0);
8123 
8124   return SDValue();
8125 }
8126 
8127 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8128   SDValue N0 = N->getOperand(0);
8129   EVT VT = N->getValueType(0);
8130   bool isLE = DAG.getDataLayout().isLittleEndian();
8131 
8132   // noop truncate
8133   if (N0.getValueType() == N->getValueType(0))
8134     return N0;
8135   // fold (truncate c1) -> c1
8136   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8137     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8138   // fold (truncate (truncate x)) -> (truncate x)
8139   if (N0.getOpcode() == ISD::TRUNCATE)
8140     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8141   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8142   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8143       N0.getOpcode() == ISD::SIGN_EXTEND ||
8144       N0.getOpcode() == ISD::ANY_EXTEND) {
8145     // if the source is smaller than the dest, we still need an extend.
8146     if (N0.getOperand(0).getValueType().bitsLT(VT))
8147       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8148     // if the source is larger than the dest, than we just need the truncate.
8149     if (N0.getOperand(0).getValueType().bitsGT(VT))
8150       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8151     // if the source and dest are the same type, we can drop both the extend
8152     // and the truncate.
8153     return N0.getOperand(0);
8154   }
8155 
8156   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8157   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8158     return SDValue();
8159 
8160   // Fold extract-and-trunc into a narrow extract. For example:
8161   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8162   //   i32 y = TRUNCATE(i64 x)
8163   //        -- becomes --
8164   //   v16i8 b = BITCAST (v2i64 val)
8165   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8166   //
8167   // Note: We only run this optimization after type legalization (which often
8168   // creates this pattern) and before operation legalization after which
8169   // we need to be more careful about the vector instructions that we generate.
8170   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8171       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8172 
8173     EVT VecTy = N0.getOperand(0).getValueType();
8174     EVT ExTy = N0.getValueType();
8175     EVT TrTy = N->getValueType(0);
8176 
8177     unsigned NumElem = VecTy.getVectorNumElements();
8178     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8179 
8180     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8181     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8182 
8183     SDValue EltNo = N0->getOperand(1);
8184     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8185       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8186       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8187       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8188 
8189       SDLoc DL(N);
8190       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8191                          DAG.getBitcast(NVT, N0.getOperand(0)),
8192                          DAG.getConstant(Index, DL, IndexTy));
8193     }
8194   }
8195 
8196   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8197   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8198     EVT SrcVT = N0.getValueType();
8199     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8200         TLI.isTruncateFree(SrcVT, VT)) {
8201       SDLoc SL(N0);
8202       SDValue Cond = N0.getOperand(0);
8203       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8204       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8205       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8206     }
8207   }
8208 
8209   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8210   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8211       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8212       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8213     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
8214       uint64_t Amt = CAmt->getZExtValue();
8215       unsigned Size = VT.getScalarSizeInBits();
8216 
8217       if (Amt < Size) {
8218         SDLoc SL(N);
8219         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8220 
8221         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8222         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
8223                            DAG.getConstant(Amt, SL, AmtVT));
8224       }
8225     }
8226   }
8227 
8228   // Fold a series of buildvector, bitcast, and truncate if possible.
8229   // For example fold
8230   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8231   //   (2xi32 (buildvector x, y)).
8232   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8233       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8234       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8235       N0.getOperand(0).hasOneUse()) {
8236 
8237     SDValue BuildVect = N0.getOperand(0);
8238     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8239     EVT TruncVecEltTy = VT.getVectorElementType();
8240 
8241     // Check that the element types match.
8242     if (BuildVectEltTy == TruncVecEltTy) {
8243       // Now we only need to compute the offset of the truncated elements.
8244       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8245       unsigned TruncVecNumElts = VT.getVectorNumElements();
8246       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8247 
8248       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8249              "Invalid number of elements");
8250 
8251       SmallVector<SDValue, 8> Opnds;
8252       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8253         Opnds.push_back(BuildVect.getOperand(i));
8254 
8255       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8256     }
8257   }
8258 
8259   // See if we can simplify the input to this truncate through knowledge that
8260   // only the low bits are being used.
8261   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8262   // Currently we only perform this optimization on scalars because vectors
8263   // may have different active low bits.
8264   if (!VT.isVector()) {
8265     if (SDValue Shorter =
8266             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
8267                                                      VT.getSizeInBits())))
8268       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8269   }
8270 
8271   // fold (truncate (load x)) -> (smaller load x)
8272   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8273   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8274     if (SDValue Reduced = ReduceLoadWidth(N))
8275       return Reduced;
8276 
8277     // Handle the case where the load remains an extending load even
8278     // after truncation.
8279     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8280       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8281       if (!LN0->isVolatile() &&
8282           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8283         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8284                                          VT, LN0->getChain(), LN0->getBasePtr(),
8285                                          LN0->getMemoryVT(),
8286                                          LN0->getMemOperand());
8287         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8288         return NewLoad;
8289       }
8290     }
8291   }
8292 
8293   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8294   // where ... are all 'undef'.
8295   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8296     SmallVector<EVT, 8> VTs;
8297     SDValue V;
8298     unsigned Idx = 0;
8299     unsigned NumDefs = 0;
8300 
8301     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8302       SDValue X = N0.getOperand(i);
8303       if (!X.isUndef()) {
8304         V = X;
8305         Idx = i;
8306         NumDefs++;
8307       }
8308       // Stop if more than one members are non-undef.
8309       if (NumDefs > 1)
8310         break;
8311       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8312                                      VT.getVectorElementType(),
8313                                      X.getValueType().getVectorNumElements()));
8314     }
8315 
8316     if (NumDefs == 0)
8317       return DAG.getUNDEF(VT);
8318 
8319     if (NumDefs == 1) {
8320       assert(V.getNode() && "The single defined operand is empty!");
8321       SmallVector<SDValue, 8> Opnds;
8322       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8323         if (i != Idx) {
8324           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8325           continue;
8326         }
8327         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8328         AddToWorklist(NV.getNode());
8329         Opnds.push_back(NV);
8330       }
8331       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8332     }
8333   }
8334 
8335   // Fold truncate of a bitcast of a vector to an extract of the low vector
8336   // element.
8337   //
8338   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
8339   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8340     SDValue VecSrc = N0.getOperand(0);
8341     EVT SrcVT = VecSrc.getValueType();
8342     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8343         (!LegalOperations ||
8344          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8345       SDLoc SL(N);
8346 
8347       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8348       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8349                          VecSrc, DAG.getConstant(0, SL, IdxVT));
8350     }
8351   }
8352 
8353   // Simplify the operands using demanded-bits information.
8354   if (!VT.isVector() &&
8355       SimplifyDemandedBits(SDValue(N, 0)))
8356     return SDValue(N, 0);
8357 
8358   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8359   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8360   // When the adde's carry is not used.
8361   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8362       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8363       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8364     SDLoc SL(N);
8365     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8366     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8367     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8368     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8369   }
8370 
8371   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8372     return NewVSel;
8373 
8374   return SDValue();
8375 }
8376 
8377 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8378   SDValue Elt = N->getOperand(i);
8379   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8380     return Elt.getNode();
8381   return Elt.getOperand(Elt.getResNo()).getNode();
8382 }
8383 
8384 /// build_pair (load, load) -> load
8385 /// if load locations are consecutive.
8386 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8387   assert(N->getOpcode() == ISD::BUILD_PAIR);
8388 
8389   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8390   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8391   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8392       LD1->getAddressSpace() != LD2->getAddressSpace())
8393     return SDValue();
8394   EVT LD1VT = LD1->getValueType(0);
8395   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8396   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8397       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8398     unsigned Align = LD1->getAlignment();
8399     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8400         VT.getTypeForEVT(*DAG.getContext()));
8401 
8402     if (NewAlign <= Align &&
8403         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8404       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8405                          LD1->getPointerInfo(), Align);
8406   }
8407 
8408   return SDValue();
8409 }
8410 
8411 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8412   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8413   // and Lo parts; on big-endian machines it doesn't.
8414   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8415 }
8416 
8417 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8418                                     const TargetLowering &TLI) {
8419   // If this is not a bitcast to an FP type or if the target doesn't have
8420   // IEEE754-compliant FP logic, we're done.
8421   EVT VT = N->getValueType(0);
8422   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8423     return SDValue();
8424 
8425   // TODO: Use splat values for the constant-checking below and remove this
8426   // restriction.
8427   SDValue N0 = N->getOperand(0);
8428   EVT SourceVT = N0.getValueType();
8429   if (SourceVT.isVector())
8430     return SDValue();
8431 
8432   unsigned FPOpcode;
8433   APInt SignMask;
8434   switch (N0.getOpcode()) {
8435   case ISD::AND:
8436     FPOpcode = ISD::FABS;
8437     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8438     break;
8439   case ISD::XOR:
8440     FPOpcode = ISD::FNEG;
8441     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8442     break;
8443   // TODO: ISD::OR --> ISD::FNABS?
8444   default:
8445     return SDValue();
8446   }
8447 
8448   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8449   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8450   SDValue LogicOp0 = N0.getOperand(0);
8451   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8452   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8453       LogicOp0.getOpcode() == ISD::BITCAST &&
8454       LogicOp0->getOperand(0).getValueType() == VT)
8455     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8456 
8457   return SDValue();
8458 }
8459 
8460 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8461   SDValue N0 = N->getOperand(0);
8462   EVT VT = N->getValueType(0);
8463 
8464   if (N0.isUndef())
8465     return DAG.getUNDEF(VT);
8466 
8467   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8468   // Only do this before legalize, since afterward the target may be depending
8469   // on the bitconvert.
8470   // First check to see if this is all constant.
8471   if (!LegalTypes &&
8472       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8473       VT.isVector()) {
8474     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8475 
8476     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8477     assert(!DestEltVT.isVector() &&
8478            "Element type of vector ValueType must not be vector!");
8479     if (isSimple)
8480       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8481   }
8482 
8483   // If the input is a constant, let getNode fold it.
8484   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8485     // If we can't allow illegal operations, we need to check that this is just
8486     // a fp -> int or int -> conversion and that the resulting operation will
8487     // be legal.
8488     if (!LegalOperations ||
8489         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8490          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8491         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8492          TLI.isOperationLegal(ISD::Constant, VT)))
8493       return DAG.getBitcast(VT, N0);
8494   }
8495 
8496   // (conv (conv x, t1), t2) -> (conv x, t2)
8497   if (N0.getOpcode() == ISD::BITCAST)
8498     return DAG.getBitcast(VT, N0.getOperand(0));
8499 
8500   // fold (conv (load x)) -> (load (conv*)x)
8501   // If the resultant load doesn't need a higher alignment than the original!
8502   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8503       // Do not change the width of a volatile load.
8504       !cast<LoadSDNode>(N0)->isVolatile() &&
8505       // Do not remove the cast if the types differ in endian layout.
8506       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8507           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8508       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8509       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8510     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8511     unsigned OrigAlign = LN0->getAlignment();
8512 
8513     bool Fast = false;
8514     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8515                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8516         Fast) {
8517       SDValue Load =
8518           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8519                       LN0->getPointerInfo(), OrigAlign,
8520                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8521       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8522       return Load;
8523     }
8524   }
8525 
8526   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8527     return V;
8528 
8529   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8530   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8531   //
8532   // For ppc_fp128:
8533   // fold (bitcast (fneg x)) ->
8534   //     flipbit = signbit
8535   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8536   //
8537   // fold (bitcast (fabs x)) ->
8538   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8539   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8540   // This often reduces constant pool loads.
8541   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8542        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8543       N0.getNode()->hasOneUse() && VT.isInteger() &&
8544       !VT.isVector() && !N0.getValueType().isVector()) {
8545     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8546     AddToWorklist(NewConv.getNode());
8547 
8548     SDLoc DL(N);
8549     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8550       assert(VT.getSizeInBits() == 128);
8551       SDValue SignBit = DAG.getConstant(
8552           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8553       SDValue FlipBit;
8554       if (N0.getOpcode() == ISD::FNEG) {
8555         FlipBit = SignBit;
8556         AddToWorklist(FlipBit.getNode());
8557       } else {
8558         assert(N0.getOpcode() == ISD::FABS);
8559         SDValue Hi =
8560             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8561                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8562                                               SDLoc(NewConv)));
8563         AddToWorklist(Hi.getNode());
8564         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8565         AddToWorklist(FlipBit.getNode());
8566       }
8567       SDValue FlipBits =
8568           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8569       AddToWorklist(FlipBits.getNode());
8570       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8571     }
8572     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8573     if (N0.getOpcode() == ISD::FNEG)
8574       return DAG.getNode(ISD::XOR, DL, VT,
8575                          NewConv, DAG.getConstant(SignBit, DL, VT));
8576     assert(N0.getOpcode() == ISD::FABS);
8577     return DAG.getNode(ISD::AND, DL, VT,
8578                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8579   }
8580 
8581   // fold (bitconvert (fcopysign cst, x)) ->
8582   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8583   // Note that we don't handle (copysign x, cst) because this can always be
8584   // folded to an fneg or fabs.
8585   //
8586   // For ppc_fp128:
8587   // fold (bitcast (fcopysign cst, x)) ->
8588   //     flipbit = (and (extract_element
8589   //                     (xor (bitcast cst), (bitcast x)), 0),
8590   //                    signbit)
8591   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8592   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8593       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8594       VT.isInteger() && !VT.isVector()) {
8595     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8596     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8597     if (isTypeLegal(IntXVT)) {
8598       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8599       AddToWorklist(X.getNode());
8600 
8601       // If X has a different width than the result/lhs, sext it or truncate it.
8602       unsigned VTWidth = VT.getSizeInBits();
8603       if (OrigXWidth < VTWidth) {
8604         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8605         AddToWorklist(X.getNode());
8606       } else if (OrigXWidth > VTWidth) {
8607         // To get the sign bit in the right place, we have to shift it right
8608         // before truncating.
8609         SDLoc DL(X);
8610         X = DAG.getNode(ISD::SRL, DL,
8611                         X.getValueType(), X,
8612                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8613                                         X.getValueType()));
8614         AddToWorklist(X.getNode());
8615         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8616         AddToWorklist(X.getNode());
8617       }
8618 
8619       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8620         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8621         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8622         AddToWorklist(Cst.getNode());
8623         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8624         AddToWorklist(X.getNode());
8625         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8626         AddToWorklist(XorResult.getNode());
8627         SDValue XorResult64 = DAG.getNode(
8628             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8629             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8630                                   SDLoc(XorResult)));
8631         AddToWorklist(XorResult64.getNode());
8632         SDValue FlipBit =
8633             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8634                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8635         AddToWorklist(FlipBit.getNode());
8636         SDValue FlipBits =
8637             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8638         AddToWorklist(FlipBits.getNode());
8639         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8640       }
8641       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8642       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8643                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8644       AddToWorklist(X.getNode());
8645 
8646       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8647       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8648                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8649       AddToWorklist(Cst.getNode());
8650 
8651       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8652     }
8653   }
8654 
8655   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8656   if (N0.getOpcode() == ISD::BUILD_PAIR)
8657     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8658       return CombineLD;
8659 
8660   // Remove double bitcasts from shuffles - this is often a legacy of
8661   // XformToShuffleWithZero being used to combine bitmaskings (of
8662   // float vectors bitcast to integer vectors) into shuffles.
8663   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8664   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8665       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8666       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8667       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8668     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8669 
8670     // If operands are a bitcast, peek through if it casts the original VT.
8671     // If operands are a constant, just bitcast back to original VT.
8672     auto PeekThroughBitcast = [&](SDValue Op) {
8673       if (Op.getOpcode() == ISD::BITCAST &&
8674           Op.getOperand(0).getValueType() == VT)
8675         return SDValue(Op.getOperand(0));
8676       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8677           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8678         return DAG.getBitcast(VT, Op);
8679       return SDValue();
8680     };
8681 
8682     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8683     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8684     if (!(SV0 && SV1))
8685       return SDValue();
8686 
8687     int MaskScale =
8688         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8689     SmallVector<int, 8> NewMask;
8690     for (int M : SVN->getMask())
8691       for (int i = 0; i != MaskScale; ++i)
8692         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8693 
8694     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8695     if (!LegalMask) {
8696       std::swap(SV0, SV1);
8697       ShuffleVectorSDNode::commuteMask(NewMask);
8698       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8699     }
8700 
8701     if (LegalMask)
8702       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8703   }
8704 
8705   return SDValue();
8706 }
8707 
8708 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8709   EVT VT = N->getValueType(0);
8710   return CombineConsecutiveLoads(N, VT);
8711 }
8712 
8713 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8714 /// operands. DstEltVT indicates the destination element value type.
8715 SDValue DAGCombiner::
8716 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8717   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8718 
8719   // If this is already the right type, we're done.
8720   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8721 
8722   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8723   unsigned DstBitSize = DstEltVT.getSizeInBits();
8724 
8725   // If this is a conversion of N elements of one type to N elements of another
8726   // type, convert each element.  This handles FP<->INT cases.
8727   if (SrcBitSize == DstBitSize) {
8728     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8729                               BV->getValueType(0).getVectorNumElements());
8730 
8731     // Due to the FP element handling below calling this routine recursively,
8732     // we can end up with a scalar-to-vector node here.
8733     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8734       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8735                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8736 
8737     SmallVector<SDValue, 8> Ops;
8738     for (SDValue Op : BV->op_values()) {
8739       // If the vector element type is not legal, the BUILD_VECTOR operands
8740       // are promoted and implicitly truncated.  Make that explicit here.
8741       if (Op.getValueType() != SrcEltVT)
8742         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8743       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8744       AddToWorklist(Ops.back().getNode());
8745     }
8746     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8747   }
8748 
8749   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8750   // handle annoying details of growing/shrinking FP values, we convert them to
8751   // int first.
8752   if (SrcEltVT.isFloatingPoint()) {
8753     // Convert the input float vector to a int vector where the elements are the
8754     // same sizes.
8755     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8756     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8757     SrcEltVT = IntVT;
8758   }
8759 
8760   // Now we know the input is an integer vector.  If the output is a FP type,
8761   // convert to integer first, then to FP of the right size.
8762   if (DstEltVT.isFloatingPoint()) {
8763     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8764     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8765 
8766     // Next, convert to FP elements of the same size.
8767     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8768   }
8769 
8770   SDLoc DL(BV);
8771 
8772   // Okay, we know the src/dst types are both integers of differing types.
8773   // Handling growing first.
8774   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8775   if (SrcBitSize < DstBitSize) {
8776     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8777 
8778     SmallVector<SDValue, 8> Ops;
8779     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8780          i += NumInputsPerOutput) {
8781       bool isLE = DAG.getDataLayout().isLittleEndian();
8782       APInt NewBits = APInt(DstBitSize, 0);
8783       bool EltIsUndef = true;
8784       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8785         // Shift the previously computed bits over.
8786         NewBits <<= SrcBitSize;
8787         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8788         if (Op.isUndef()) continue;
8789         EltIsUndef = false;
8790 
8791         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8792                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8793       }
8794 
8795       if (EltIsUndef)
8796         Ops.push_back(DAG.getUNDEF(DstEltVT));
8797       else
8798         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8799     }
8800 
8801     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8802     return DAG.getBuildVector(VT, DL, Ops);
8803   }
8804 
8805   // Finally, this must be the case where we are shrinking elements: each input
8806   // turns into multiple outputs.
8807   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8808   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8809                             NumOutputsPerInput*BV->getNumOperands());
8810   SmallVector<SDValue, 8> Ops;
8811 
8812   for (const SDValue &Op : BV->op_values()) {
8813     if (Op.isUndef()) {
8814       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8815       continue;
8816     }
8817 
8818     APInt OpVal = cast<ConstantSDNode>(Op)->
8819                   getAPIntValue().zextOrTrunc(SrcBitSize);
8820 
8821     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8822       APInt ThisVal = OpVal.trunc(DstBitSize);
8823       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8824       OpVal.lshrInPlace(DstBitSize);
8825     }
8826 
8827     // For big endian targets, swap the order of the pieces of each element.
8828     if (DAG.getDataLayout().isBigEndian())
8829       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8830   }
8831 
8832   return DAG.getBuildVector(VT, DL, Ops);
8833 }
8834 
8835 static bool isContractable(SDNode *N) {
8836   SDNodeFlags F = N->getFlags();
8837   return F.hasAllowContract() || F.hasUnsafeAlgebra();
8838 }
8839 
8840 /// Try to perform FMA combining on a given FADD node.
8841 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8842   SDValue N0 = N->getOperand(0);
8843   SDValue N1 = N->getOperand(1);
8844   EVT VT = N->getValueType(0);
8845   SDLoc SL(N);
8846 
8847   const TargetOptions &Options = DAG.getTarget().Options;
8848 
8849   // Floating-point multiply-add with intermediate rounding.
8850   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8851 
8852   // Floating-point multiply-add without intermediate rounding.
8853   bool HasFMA =
8854       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8855       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8856 
8857   // No valid opcode, do not combine.
8858   if (!HasFMAD && !HasFMA)
8859     return SDValue();
8860 
8861   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8862                               Options.UnsafeFPMath || HasFMAD);
8863   // If the addition is not contractable, do not combine.
8864   if (!AllowFusionGlobally && !isContractable(N))
8865     return SDValue();
8866 
8867   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8868   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
8869     return SDValue();
8870 
8871   // Always prefer FMAD to FMA for precision.
8872   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8873   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8874   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8875 
8876   // Is the node an FMUL and contractable either due to global flags or
8877   // SDNodeFlags.
8878   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
8879     if (N.getOpcode() != ISD::FMUL)
8880       return false;
8881     return AllowFusionGlobally || isContractable(N.getNode());
8882   };
8883   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8884   // prefer to fold the multiply with fewer uses.
8885   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
8886     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8887       std::swap(N0, N1);
8888   }
8889 
8890   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8891   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
8892     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8893                        N0.getOperand(0), N0.getOperand(1), N1);
8894   }
8895 
8896   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8897   // Note: Commutes FADD operands.
8898   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
8899     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8900                        N1.getOperand(0), N1.getOperand(1), N0);
8901   }
8902 
8903   // Look through FP_EXTEND nodes to do more combining.
8904   if (LookThroughFPExt) {
8905     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8906     if (N0.getOpcode() == ISD::FP_EXTEND) {
8907       SDValue N00 = N0.getOperand(0);
8908       if (isContractableFMUL(N00))
8909         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8910                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8911                                        N00.getOperand(0)),
8912                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8913                                        N00.getOperand(1)), N1);
8914     }
8915 
8916     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
8917     // Note: Commutes FADD operands.
8918     if (N1.getOpcode() == ISD::FP_EXTEND) {
8919       SDValue N10 = N1.getOperand(0);
8920       if (isContractableFMUL(N10))
8921         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8922                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8923                                        N10.getOperand(0)),
8924                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8925                                        N10.getOperand(1)), N0);
8926     }
8927   }
8928 
8929   // More folding opportunities when target permits.
8930   if (Aggressive) {
8931     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8932     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8933     // are currently only supported on binary nodes.
8934     if (Options.UnsafeFPMath &&
8935         N0.getOpcode() == PreferredFusedOpcode &&
8936         N0.getOperand(2).getOpcode() == ISD::FMUL &&
8937         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8938       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8939                          N0.getOperand(0), N0.getOperand(1),
8940                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8941                                      N0.getOperand(2).getOperand(0),
8942                                      N0.getOperand(2).getOperand(1),
8943                                      N1));
8944     }
8945 
8946     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8947     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8948     // are currently only supported on binary nodes.
8949     if (Options.UnsafeFPMath &&
8950         N1->getOpcode() == PreferredFusedOpcode &&
8951         N1.getOperand(2).getOpcode() == ISD::FMUL &&
8952         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
8953       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8954                          N1.getOperand(0), N1.getOperand(1),
8955                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8956                                      N1.getOperand(2).getOperand(0),
8957                                      N1.getOperand(2).getOperand(1),
8958                                      N0));
8959     }
8960 
8961     if (LookThroughFPExt) {
8962       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8963       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8964       auto FoldFAddFMAFPExtFMul = [&] (
8965           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8966         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8967                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8968                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8969                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8970                                        Z));
8971       };
8972       if (N0.getOpcode() == PreferredFusedOpcode) {
8973         SDValue N02 = N0.getOperand(2);
8974         if (N02.getOpcode() == ISD::FP_EXTEND) {
8975           SDValue N020 = N02.getOperand(0);
8976           if (isContractableFMUL(N020))
8977             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8978                                         N020.getOperand(0), N020.getOperand(1),
8979                                         N1);
8980         }
8981       }
8982 
8983       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8984       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8985       // FIXME: This turns two single-precision and one double-precision
8986       // operation into two double-precision operations, which might not be
8987       // interesting for all targets, especially GPUs.
8988       auto FoldFAddFPExtFMAFMul = [&] (
8989           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8990         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8991                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8992                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8993                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8994                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8995                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8996                                        Z));
8997       };
8998       if (N0.getOpcode() == ISD::FP_EXTEND) {
8999         SDValue N00 = N0.getOperand(0);
9000         if (N00.getOpcode() == PreferredFusedOpcode) {
9001           SDValue N002 = N00.getOperand(2);
9002           if (isContractableFMUL(N002))
9003             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9004                                         N002.getOperand(0), N002.getOperand(1),
9005                                         N1);
9006         }
9007       }
9008 
9009       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9010       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9011       if (N1.getOpcode() == PreferredFusedOpcode) {
9012         SDValue N12 = N1.getOperand(2);
9013         if (N12.getOpcode() == ISD::FP_EXTEND) {
9014           SDValue N120 = N12.getOperand(0);
9015           if (isContractableFMUL(N120))
9016             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9017                                         N120.getOperand(0), N120.getOperand(1),
9018                                         N0);
9019         }
9020       }
9021 
9022       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9023       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9024       // FIXME: This turns two single-precision and one double-precision
9025       // operation into two double-precision operations, which might not be
9026       // interesting for all targets, especially GPUs.
9027       if (N1.getOpcode() == ISD::FP_EXTEND) {
9028         SDValue N10 = N1.getOperand(0);
9029         if (N10.getOpcode() == PreferredFusedOpcode) {
9030           SDValue N102 = N10.getOperand(2);
9031           if (isContractableFMUL(N102))
9032             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9033                                         N102.getOperand(0), N102.getOperand(1),
9034                                         N0);
9035         }
9036       }
9037     }
9038   }
9039 
9040   return SDValue();
9041 }
9042 
9043 /// Try to perform FMA combining on a given FSUB node.
9044 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9045   SDValue N0 = N->getOperand(0);
9046   SDValue N1 = N->getOperand(1);
9047   EVT VT = N->getValueType(0);
9048   SDLoc SL(N);
9049 
9050   const TargetOptions &Options = DAG.getTarget().Options;
9051   // Floating-point multiply-add with intermediate rounding.
9052   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9053 
9054   // Floating-point multiply-add without intermediate rounding.
9055   bool HasFMA =
9056       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9057       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9058 
9059   // No valid opcode, do not combine.
9060   if (!HasFMAD && !HasFMA)
9061     return SDValue();
9062 
9063   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9064                               Options.UnsafeFPMath || HasFMAD);
9065   // If the subtraction is not contractable, do not combine.
9066   if (!AllowFusionGlobally && !isContractable(N))
9067     return SDValue();
9068 
9069   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9070   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9071     return SDValue();
9072 
9073   // Always prefer FMAD to FMA for precision.
9074   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9075   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9076   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9077 
9078   // Is the node an FMUL and contractable either due to global flags or
9079   // SDNodeFlags.
9080   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9081     if (N.getOpcode() != ISD::FMUL)
9082       return false;
9083     return AllowFusionGlobally || isContractable(N.getNode());
9084   };
9085 
9086   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9087   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9088     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9089                        N0.getOperand(0), N0.getOperand(1),
9090                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9091   }
9092 
9093   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9094   // Note: Commutes FSUB operands.
9095   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9096     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9097                        DAG.getNode(ISD::FNEG, SL, VT,
9098                                    N1.getOperand(0)),
9099                        N1.getOperand(1), N0);
9100 
9101   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9102   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9103       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9104     SDValue N00 = N0.getOperand(0).getOperand(0);
9105     SDValue N01 = N0.getOperand(0).getOperand(1);
9106     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9107                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9108                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9109   }
9110 
9111   // Look through FP_EXTEND nodes to do more combining.
9112   if (LookThroughFPExt) {
9113     // fold (fsub (fpext (fmul x, y)), z)
9114     //   -> (fma (fpext x), (fpext y), (fneg z))
9115     if (N0.getOpcode() == ISD::FP_EXTEND) {
9116       SDValue N00 = N0.getOperand(0);
9117       if (isContractableFMUL(N00))
9118         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9119                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9120                                        N00.getOperand(0)),
9121                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9122                                        N00.getOperand(1)),
9123                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9124     }
9125 
9126     // fold (fsub x, (fpext (fmul y, z)))
9127     //   -> (fma (fneg (fpext y)), (fpext z), x)
9128     // Note: Commutes FSUB operands.
9129     if (N1.getOpcode() == ISD::FP_EXTEND) {
9130       SDValue N10 = N1.getOperand(0);
9131       if (isContractableFMUL(N10))
9132         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9133                            DAG.getNode(ISD::FNEG, SL, VT,
9134                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9135                                                    N10.getOperand(0))),
9136                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9137                                        N10.getOperand(1)),
9138                            N0);
9139     }
9140 
9141     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9142     //   -> (fneg (fma (fpext x), (fpext y), z))
9143     // Note: This could be removed with appropriate canonicalization of the
9144     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9145     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9146     // from implementing the canonicalization in visitFSUB.
9147     if (N0.getOpcode() == ISD::FP_EXTEND) {
9148       SDValue N00 = N0.getOperand(0);
9149       if (N00.getOpcode() == ISD::FNEG) {
9150         SDValue N000 = N00.getOperand(0);
9151         if (isContractableFMUL(N000)) {
9152           return DAG.getNode(ISD::FNEG, SL, VT,
9153                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9154                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9155                                                      N000.getOperand(0)),
9156                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9157                                                      N000.getOperand(1)),
9158                                          N1));
9159         }
9160       }
9161     }
9162 
9163     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9164     //   -> (fneg (fma (fpext x)), (fpext y), z)
9165     // Note: This could be removed with appropriate canonicalization of the
9166     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9167     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9168     // from implementing the canonicalization in visitFSUB.
9169     if (N0.getOpcode() == ISD::FNEG) {
9170       SDValue N00 = N0.getOperand(0);
9171       if (N00.getOpcode() == ISD::FP_EXTEND) {
9172         SDValue N000 = N00.getOperand(0);
9173         if (isContractableFMUL(N000)) {
9174           return DAG.getNode(ISD::FNEG, SL, VT,
9175                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9176                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9177                                                      N000.getOperand(0)),
9178                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9179                                                      N000.getOperand(1)),
9180                                          N1));
9181         }
9182       }
9183     }
9184 
9185   }
9186 
9187   // More folding opportunities when target permits.
9188   if (Aggressive) {
9189     // fold (fsub (fma x, y, (fmul u, v)), z)
9190     //   -> (fma x, y (fma u, v, (fneg z)))
9191     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9192     // are currently only supported on binary nodes.
9193     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9194         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9195         N0.getOperand(2)->hasOneUse()) {
9196       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9197                          N0.getOperand(0), N0.getOperand(1),
9198                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9199                                      N0.getOperand(2).getOperand(0),
9200                                      N0.getOperand(2).getOperand(1),
9201                                      DAG.getNode(ISD::FNEG, SL, VT,
9202                                                  N1)));
9203     }
9204 
9205     // fold (fsub x, (fma y, z, (fmul u, v)))
9206     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9207     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9208     // are currently only supported on binary nodes.
9209     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9210         isContractableFMUL(N1.getOperand(2))) {
9211       SDValue N20 = N1.getOperand(2).getOperand(0);
9212       SDValue N21 = N1.getOperand(2).getOperand(1);
9213       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9214                          DAG.getNode(ISD::FNEG, SL, VT,
9215                                      N1.getOperand(0)),
9216                          N1.getOperand(1),
9217                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9218                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9219 
9220                                      N21, N0));
9221     }
9222 
9223     if (LookThroughFPExt) {
9224       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9225       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9226       if (N0.getOpcode() == PreferredFusedOpcode) {
9227         SDValue N02 = N0.getOperand(2);
9228         if (N02.getOpcode() == ISD::FP_EXTEND) {
9229           SDValue N020 = N02.getOperand(0);
9230           if (isContractableFMUL(N020))
9231             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9232                                N0.getOperand(0), N0.getOperand(1),
9233                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9234                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9235                                                        N020.getOperand(0)),
9236                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9237                                                        N020.getOperand(1)),
9238                                            DAG.getNode(ISD::FNEG, SL, VT,
9239                                                        N1)));
9240         }
9241       }
9242 
9243       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9244       //   -> (fma (fpext x), (fpext y),
9245       //           (fma (fpext u), (fpext v), (fneg z)))
9246       // FIXME: This turns two single-precision and one double-precision
9247       // operation into two double-precision operations, which might not be
9248       // interesting for all targets, especially GPUs.
9249       if (N0.getOpcode() == ISD::FP_EXTEND) {
9250         SDValue N00 = N0.getOperand(0);
9251         if (N00.getOpcode() == PreferredFusedOpcode) {
9252           SDValue N002 = N00.getOperand(2);
9253           if (isContractableFMUL(N002))
9254             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9255                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9256                                            N00.getOperand(0)),
9257                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9258                                            N00.getOperand(1)),
9259                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9260                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9261                                                        N002.getOperand(0)),
9262                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9263                                                        N002.getOperand(1)),
9264                                            DAG.getNode(ISD::FNEG, SL, VT,
9265                                                        N1)));
9266         }
9267       }
9268 
9269       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9270       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9271       if (N1.getOpcode() == PreferredFusedOpcode &&
9272         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9273         SDValue N120 = N1.getOperand(2).getOperand(0);
9274         if (isContractableFMUL(N120)) {
9275           SDValue N1200 = N120.getOperand(0);
9276           SDValue N1201 = N120.getOperand(1);
9277           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9278                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9279                              N1.getOperand(1),
9280                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9281                                          DAG.getNode(ISD::FNEG, SL, VT,
9282                                              DAG.getNode(ISD::FP_EXTEND, SL,
9283                                                          VT, N1200)),
9284                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9285                                                      N1201),
9286                                          N0));
9287         }
9288       }
9289 
9290       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9291       //   -> (fma (fneg (fpext y)), (fpext z),
9292       //           (fma (fneg (fpext u)), (fpext v), x))
9293       // FIXME: This turns two single-precision and one double-precision
9294       // operation into two double-precision operations, which might not be
9295       // interesting for all targets, especially GPUs.
9296       if (N1.getOpcode() == ISD::FP_EXTEND &&
9297         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9298         SDValue N100 = N1.getOperand(0).getOperand(0);
9299         SDValue N101 = N1.getOperand(0).getOperand(1);
9300         SDValue N102 = N1.getOperand(0).getOperand(2);
9301         if (isContractableFMUL(N102)) {
9302           SDValue N1020 = N102.getOperand(0);
9303           SDValue N1021 = N102.getOperand(1);
9304           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9305                              DAG.getNode(ISD::FNEG, SL, VT,
9306                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9307                                                      N100)),
9308                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9309                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9310                                          DAG.getNode(ISD::FNEG, SL, VT,
9311                                              DAG.getNode(ISD::FP_EXTEND, SL,
9312                                                          VT, N1020)),
9313                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9314                                                      N1021),
9315                                          N0));
9316         }
9317       }
9318     }
9319   }
9320 
9321   return SDValue();
9322 }
9323 
9324 /// Try to perform FMA combining on a given FMUL node based on the distributive
9325 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9326 /// subtraction instead of addition).
9327 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9328   SDValue N0 = N->getOperand(0);
9329   SDValue N1 = N->getOperand(1);
9330   EVT VT = N->getValueType(0);
9331   SDLoc SL(N);
9332 
9333   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9334 
9335   const TargetOptions &Options = DAG.getTarget().Options;
9336 
9337   // The transforms below are incorrect when x == 0 and y == inf, because the
9338   // intermediate multiplication produces a nan.
9339   if (!Options.NoInfsFPMath)
9340     return SDValue();
9341 
9342   // Floating-point multiply-add without intermediate rounding.
9343   bool HasFMA =
9344       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9345       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9346       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9347 
9348   // Floating-point multiply-add with intermediate rounding. This can result
9349   // in a less precise result due to the changed rounding order.
9350   bool HasFMAD = Options.UnsafeFPMath &&
9351                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9352 
9353   // No valid opcode, do not combine.
9354   if (!HasFMAD && !HasFMA)
9355     return SDValue();
9356 
9357   // Always prefer FMAD to FMA for precision.
9358   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9359   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9360 
9361   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9362   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9363   auto FuseFADD = [&](SDValue X, SDValue Y) {
9364     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9365       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9366       if (XC1 && XC1->isExactlyValue(+1.0))
9367         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9368       if (XC1 && XC1->isExactlyValue(-1.0))
9369         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9370                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9371     }
9372     return SDValue();
9373   };
9374 
9375   if (SDValue FMA = FuseFADD(N0, N1))
9376     return FMA;
9377   if (SDValue FMA = FuseFADD(N1, N0))
9378     return FMA;
9379 
9380   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9381   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9382   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9383   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9384   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9385     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9386       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9387       if (XC0 && XC0->isExactlyValue(+1.0))
9388         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9389                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9390                            Y);
9391       if (XC0 && XC0->isExactlyValue(-1.0))
9392         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9393                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9394                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9395 
9396       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9397       if (XC1 && XC1->isExactlyValue(+1.0))
9398         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9399                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9400       if (XC1 && XC1->isExactlyValue(-1.0))
9401         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9402     }
9403     return SDValue();
9404   };
9405 
9406   if (SDValue FMA = FuseFSUB(N0, N1))
9407     return FMA;
9408   if (SDValue FMA = FuseFSUB(N1, N0))
9409     return FMA;
9410 
9411   return SDValue();
9412 }
9413 
9414 static bool isFMulNegTwo(SDValue &N) {
9415   if (N.getOpcode() != ISD::FMUL)
9416     return false;
9417   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9418     return CFP->isExactlyValue(-2.0);
9419   return false;
9420 }
9421 
9422 SDValue DAGCombiner::visitFADD(SDNode *N) {
9423   SDValue N0 = N->getOperand(0);
9424   SDValue N1 = N->getOperand(1);
9425   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9426   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9427   EVT VT = N->getValueType(0);
9428   SDLoc DL(N);
9429   const TargetOptions &Options = DAG.getTarget().Options;
9430   const SDNodeFlags Flags = N->getFlags();
9431 
9432   // fold vector ops
9433   if (VT.isVector())
9434     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9435       return FoldedVOp;
9436 
9437   // fold (fadd c1, c2) -> c1 + c2
9438   if (N0CFP && N1CFP)
9439     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9440 
9441   // canonicalize constant to RHS
9442   if (N0CFP && !N1CFP)
9443     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9444 
9445   if (SDValue NewSel = foldBinOpIntoSelect(N))
9446     return NewSel;
9447 
9448   // fold (fadd A, (fneg B)) -> (fsub A, B)
9449   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9450       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9451     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9452                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9453 
9454   // fold (fadd (fneg A), B) -> (fsub B, A)
9455   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9456       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9457     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9458                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9459 
9460   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9461   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9462   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9463       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9464     bool N1IsFMul = isFMulNegTwo(N1);
9465     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9466     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9467     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9468   }
9469 
9470   // FIXME: Auto-upgrade the target/function-level option.
9471   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9472     // fold (fadd A, 0) -> A
9473     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9474       if (N1C->isZero())
9475         return N0;
9476   }
9477 
9478   // If 'unsafe math' is enabled, fold lots of things.
9479   if (Options.UnsafeFPMath) {
9480     // No FP constant should be created after legalization as Instruction
9481     // Selection pass has a hard time dealing with FP constants.
9482     bool AllowNewConst = (Level < AfterLegalizeDAG);
9483 
9484     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9485     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9486         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9487       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9488                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9489                                      Flags),
9490                          Flags);
9491 
9492     // If allowed, fold (fadd (fneg x), x) -> 0.0
9493     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9494       return DAG.getConstantFP(0.0, DL, VT);
9495 
9496     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9497     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9498       return DAG.getConstantFP(0.0, DL, VT);
9499 
9500     // We can fold chains of FADD's of the same value into multiplications.
9501     // This transform is not safe in general because we are reducing the number
9502     // of rounding steps.
9503     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9504       if (N0.getOpcode() == ISD::FMUL) {
9505         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9506         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9507 
9508         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9509         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9510           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9511                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9512           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9513         }
9514 
9515         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9516         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9517             N1.getOperand(0) == N1.getOperand(1) &&
9518             N0.getOperand(0) == N1.getOperand(0)) {
9519           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9520                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9521           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9522         }
9523       }
9524 
9525       if (N1.getOpcode() == ISD::FMUL) {
9526         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9527         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9528 
9529         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9530         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9531           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9532                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9533           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9534         }
9535 
9536         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9537         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9538             N0.getOperand(0) == N0.getOperand(1) &&
9539             N1.getOperand(0) == N0.getOperand(0)) {
9540           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9541                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9542           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9543         }
9544       }
9545 
9546       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9547         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9548         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9549         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9550             (N0.getOperand(0) == N1)) {
9551           return DAG.getNode(ISD::FMUL, DL, VT,
9552                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9553         }
9554       }
9555 
9556       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9557         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9558         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9559         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9560             N1.getOperand(0) == N0) {
9561           return DAG.getNode(ISD::FMUL, DL, VT,
9562                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9563         }
9564       }
9565 
9566       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9567       if (AllowNewConst &&
9568           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9569           N0.getOperand(0) == N0.getOperand(1) &&
9570           N1.getOperand(0) == N1.getOperand(1) &&
9571           N0.getOperand(0) == N1.getOperand(0)) {
9572         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9573                            DAG.getConstantFP(4.0, DL, VT), Flags);
9574       }
9575     }
9576   } // enable-unsafe-fp-math
9577 
9578   // FADD -> FMA combines:
9579   if (SDValue Fused = visitFADDForFMACombine(N)) {
9580     AddToWorklist(Fused.getNode());
9581     return Fused;
9582   }
9583   return SDValue();
9584 }
9585 
9586 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9587   SDValue N0 = N->getOperand(0);
9588   SDValue N1 = N->getOperand(1);
9589   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9590   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9591   EVT VT = N->getValueType(0);
9592   SDLoc DL(N);
9593   const TargetOptions &Options = DAG.getTarget().Options;
9594   const SDNodeFlags Flags = N->getFlags();
9595 
9596   // fold vector ops
9597   if (VT.isVector())
9598     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9599       return FoldedVOp;
9600 
9601   // fold (fsub c1, c2) -> c1-c2
9602   if (N0CFP && N1CFP)
9603     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9604 
9605   if (SDValue NewSel = foldBinOpIntoSelect(N))
9606     return NewSel;
9607 
9608   // fold (fsub A, (fneg B)) -> (fadd A, B)
9609   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9610     return DAG.getNode(ISD::FADD, DL, VT, N0,
9611                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9612 
9613   // FIXME: Auto-upgrade the target/function-level option.
9614   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9615     // (fsub 0, B) -> -B
9616     if (N0CFP && N0CFP->isZero()) {
9617       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9618         return GetNegatedExpression(N1, DAG, LegalOperations);
9619       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9620         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9621     }
9622   }
9623 
9624   // If 'unsafe math' is enabled, fold lots of things.
9625   if (Options.UnsafeFPMath) {
9626     // (fsub A, 0) -> A
9627     if (N1CFP && N1CFP->isZero())
9628       return N0;
9629 
9630     // (fsub x, x) -> 0.0
9631     if (N0 == N1)
9632       return DAG.getConstantFP(0.0f, DL, VT);
9633 
9634     // (fsub x, (fadd x, y)) -> (fneg y)
9635     // (fsub x, (fadd y, x)) -> (fneg y)
9636     if (N1.getOpcode() == ISD::FADD) {
9637       SDValue N10 = N1->getOperand(0);
9638       SDValue N11 = N1->getOperand(1);
9639 
9640       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9641         return GetNegatedExpression(N11, DAG, LegalOperations);
9642 
9643       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9644         return GetNegatedExpression(N10, DAG, LegalOperations);
9645     }
9646   }
9647 
9648   // FSUB -> FMA combines:
9649   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9650     AddToWorklist(Fused.getNode());
9651     return Fused;
9652   }
9653 
9654   return SDValue();
9655 }
9656 
9657 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9658   SDValue N0 = N->getOperand(0);
9659   SDValue N1 = N->getOperand(1);
9660   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9661   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9662   EVT VT = N->getValueType(0);
9663   SDLoc DL(N);
9664   const TargetOptions &Options = DAG.getTarget().Options;
9665   const SDNodeFlags Flags = N->getFlags();
9666 
9667   // fold vector ops
9668   if (VT.isVector()) {
9669     // This just handles C1 * C2 for vectors. Other vector folds are below.
9670     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9671       return FoldedVOp;
9672   }
9673 
9674   // fold (fmul c1, c2) -> c1*c2
9675   if (N0CFP && N1CFP)
9676     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9677 
9678   // canonicalize constant to RHS
9679   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9680      !isConstantFPBuildVectorOrConstantFP(N1))
9681     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9682 
9683   // fold (fmul A, 1.0) -> A
9684   if (N1CFP && N1CFP->isExactlyValue(1.0))
9685     return N0;
9686 
9687   if (SDValue NewSel = foldBinOpIntoSelect(N))
9688     return NewSel;
9689 
9690   if (Options.UnsafeFPMath) {
9691     // fold (fmul A, 0) -> 0
9692     if (N1CFP && N1CFP->isZero())
9693       return N1;
9694 
9695     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9696     if (N0.getOpcode() == ISD::FMUL) {
9697       // Fold scalars or any vector constants (not just splats).
9698       // This fold is done in general by InstCombine, but extra fmul insts
9699       // may have been generated during lowering.
9700       SDValue N00 = N0.getOperand(0);
9701       SDValue N01 = N0.getOperand(1);
9702       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9703       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9704       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9705 
9706       // Check 1: Make sure that the first operand of the inner multiply is NOT
9707       // a constant. Otherwise, we may induce infinite looping.
9708       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9709         // Check 2: Make sure that the second operand of the inner multiply and
9710         // the second operand of the outer multiply are constants.
9711         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9712             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9713           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9714           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9715         }
9716       }
9717     }
9718 
9719     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9720     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9721     // during an early run of DAGCombiner can prevent folding with fmuls
9722     // inserted during lowering.
9723     if (N0.getOpcode() == ISD::FADD &&
9724         (N0.getOperand(0) == N0.getOperand(1)) &&
9725         N0.hasOneUse()) {
9726       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9727       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9728       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9729     }
9730   }
9731 
9732   // fold (fmul X, 2.0) -> (fadd X, X)
9733   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9734     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9735 
9736   // fold (fmul X, -1.0) -> (fneg X)
9737   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9738     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9739       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9740 
9741   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9742   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9743     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9744       // Both can be negated for free, check to see if at least one is cheaper
9745       // negated.
9746       if (LHSNeg == 2 || RHSNeg == 2)
9747         return DAG.getNode(ISD::FMUL, DL, VT,
9748                            GetNegatedExpression(N0, DAG, LegalOperations),
9749                            GetNegatedExpression(N1, DAG, LegalOperations),
9750                            Flags);
9751     }
9752   }
9753 
9754   // FMUL -> FMA combines:
9755   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9756     AddToWorklist(Fused.getNode());
9757     return Fused;
9758   }
9759 
9760   return SDValue();
9761 }
9762 
9763 SDValue DAGCombiner::visitFMA(SDNode *N) {
9764   SDValue N0 = N->getOperand(0);
9765   SDValue N1 = N->getOperand(1);
9766   SDValue N2 = N->getOperand(2);
9767   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9768   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9769   EVT VT = N->getValueType(0);
9770   SDLoc DL(N);
9771   const TargetOptions &Options = DAG.getTarget().Options;
9772 
9773   // Constant fold FMA.
9774   if (isa<ConstantFPSDNode>(N0) &&
9775       isa<ConstantFPSDNode>(N1) &&
9776       isa<ConstantFPSDNode>(N2)) {
9777     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9778   }
9779 
9780   if (Options.UnsafeFPMath) {
9781     if (N0CFP && N0CFP->isZero())
9782       return N2;
9783     if (N1CFP && N1CFP->isZero())
9784       return N2;
9785   }
9786   // TODO: The FMA node should have flags that propagate to these nodes.
9787   if (N0CFP && N0CFP->isExactlyValue(1.0))
9788     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9789   if (N1CFP && N1CFP->isExactlyValue(1.0))
9790     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9791 
9792   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9793   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9794      !isConstantFPBuildVectorOrConstantFP(N1))
9795     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9796 
9797   // TODO: FMA nodes should have flags that propagate to the created nodes.
9798   // For now, create a Flags object for use with all unsafe math transforms.
9799   SDNodeFlags Flags;
9800   Flags.setUnsafeAlgebra(true);
9801 
9802   if (Options.UnsafeFPMath) {
9803     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9804     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9805         isConstantFPBuildVectorOrConstantFP(N1) &&
9806         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9807       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9808                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9809                                      Flags), Flags);
9810     }
9811 
9812     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9813     if (N0.getOpcode() == ISD::FMUL &&
9814         isConstantFPBuildVectorOrConstantFP(N1) &&
9815         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9816       return DAG.getNode(ISD::FMA, DL, VT,
9817                          N0.getOperand(0),
9818                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9819                                      Flags),
9820                          N2);
9821     }
9822   }
9823 
9824   // (fma x, 1, y) -> (fadd x, y)
9825   // (fma x, -1, y) -> (fadd (fneg x), y)
9826   if (N1CFP) {
9827     if (N1CFP->isExactlyValue(1.0))
9828       // TODO: The FMA node should have flags that propagate to this node.
9829       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9830 
9831     if (N1CFP->isExactlyValue(-1.0) &&
9832         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9833       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9834       AddToWorklist(RHSNeg.getNode());
9835       // TODO: The FMA node should have flags that propagate to this node.
9836       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9837     }
9838   }
9839 
9840   if (Options.UnsafeFPMath) {
9841     // (fma x, c, x) -> (fmul x, (c+1))
9842     if (N1CFP && N0 == N2) {
9843       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9844                          DAG.getNode(ISD::FADD, DL, VT, N1,
9845                                      DAG.getConstantFP(1.0, DL, VT), Flags),
9846                          Flags);
9847     }
9848 
9849     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9850     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9851       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9852                          DAG.getNode(ISD::FADD, DL, VT, N1,
9853                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
9854                          Flags);
9855     }
9856   }
9857 
9858   return SDValue();
9859 }
9860 
9861 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9862 // reciprocal.
9863 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9864 // Notice that this is not always beneficial. One reason is different targets
9865 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9866 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9867 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9868 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9869   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9870   const SDNodeFlags Flags = N->getFlags();
9871   if (!UnsafeMath && !Flags.hasAllowReciprocal())
9872     return SDValue();
9873 
9874   // Skip if current node is a reciprocal.
9875   SDValue N0 = N->getOperand(0);
9876   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9877   if (N0CFP && N0CFP->isExactlyValue(1.0))
9878     return SDValue();
9879 
9880   // Exit early if the target does not want this transform or if there can't
9881   // possibly be enough uses of the divisor to make the transform worthwhile.
9882   SDValue N1 = N->getOperand(1);
9883   unsigned MinUses = TLI.combineRepeatedFPDivisors();
9884   if (!MinUses || N1->use_size() < MinUses)
9885     return SDValue();
9886 
9887   // Find all FDIV users of the same divisor.
9888   // Use a set because duplicates may be present in the user list.
9889   SetVector<SDNode *> Users;
9890   for (auto *U : N1->uses()) {
9891     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9892       // This division is eligible for optimization only if global unsafe math
9893       // is enabled or if this division allows reciprocal formation.
9894       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
9895         Users.insert(U);
9896     }
9897   }
9898 
9899   // Now that we have the actual number of divisor uses, make sure it meets
9900   // the minimum threshold specified by the target.
9901   if (Users.size() < MinUses)
9902     return SDValue();
9903 
9904   EVT VT = N->getValueType(0);
9905   SDLoc DL(N);
9906   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
9907   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
9908 
9909   // Dividend / Divisor -> Dividend * Reciprocal
9910   for (auto *U : Users) {
9911     SDValue Dividend = U->getOperand(0);
9912     if (Dividend != FPOne) {
9913       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
9914                                     Reciprocal, Flags);
9915       CombineTo(U, NewNode);
9916     } else if (U != Reciprocal.getNode()) {
9917       // In the absence of fast-math-flags, this user node is always the
9918       // same node as Reciprocal, but with FMF they may be different nodes.
9919       CombineTo(U, Reciprocal);
9920     }
9921   }
9922   return SDValue(N, 0);  // N was replaced.
9923 }
9924 
9925 SDValue DAGCombiner::visitFDIV(SDNode *N) {
9926   SDValue N0 = N->getOperand(0);
9927   SDValue N1 = N->getOperand(1);
9928   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9929   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9930   EVT VT = N->getValueType(0);
9931   SDLoc DL(N);
9932   const TargetOptions &Options = DAG.getTarget().Options;
9933   SDNodeFlags Flags = N->getFlags();
9934 
9935   // fold vector ops
9936   if (VT.isVector())
9937     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9938       return FoldedVOp;
9939 
9940   // fold (fdiv c1, c2) -> c1/c2
9941   if (N0CFP && N1CFP)
9942     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
9943 
9944   if (SDValue NewSel = foldBinOpIntoSelect(N))
9945     return NewSel;
9946 
9947   if (Options.UnsafeFPMath) {
9948     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
9949     if (N1CFP) {
9950       // Compute the reciprocal 1.0 / c2.
9951       const APFloat &N1APF = N1CFP->getValueAPF();
9952       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
9953       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
9954       // Only do the transform if the reciprocal is a legal fp immediate that
9955       // isn't too nasty (eg NaN, denormal, ...).
9956       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
9957           (!LegalOperations ||
9958            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
9959            // backend)... we should handle this gracefully after Legalize.
9960            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
9961            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
9962            TLI.isFPImmLegal(Recip, VT)))
9963         return DAG.getNode(ISD::FMUL, DL, VT, N0,
9964                            DAG.getConstantFP(Recip, DL, VT), Flags);
9965     }
9966 
9967     // If this FDIV is part of a reciprocal square root, it may be folded
9968     // into a target-specific square root estimate instruction.
9969     if (N1.getOpcode() == ISD::FSQRT) {
9970       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
9971         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9972       }
9973     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
9974                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9975       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9976                                           Flags)) {
9977         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
9978         AddToWorklist(RV.getNode());
9979         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9980       }
9981     } else if (N1.getOpcode() == ISD::FP_ROUND &&
9982                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9983       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9984                                           Flags)) {
9985         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
9986         AddToWorklist(RV.getNode());
9987         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9988       }
9989     } else if (N1.getOpcode() == ISD::FMUL) {
9990       // Look through an FMUL. Even though this won't remove the FDIV directly,
9991       // it's still worthwhile to get rid of the FSQRT if possible.
9992       SDValue SqrtOp;
9993       SDValue OtherOp;
9994       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9995         SqrtOp = N1.getOperand(0);
9996         OtherOp = N1.getOperand(1);
9997       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
9998         SqrtOp = N1.getOperand(1);
9999         OtherOp = N1.getOperand(0);
10000       }
10001       if (SqrtOp.getNode()) {
10002         // We found a FSQRT, so try to make this fold:
10003         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10004         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10005           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10006           AddToWorklist(RV.getNode());
10007           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10008         }
10009       }
10010     }
10011 
10012     // Fold into a reciprocal estimate and multiply instead of a real divide.
10013     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10014       AddToWorklist(RV.getNode());
10015       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10016     }
10017   }
10018 
10019   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10020   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10021     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10022       // Both can be negated for free, check to see if at least one is cheaper
10023       // negated.
10024       if (LHSNeg == 2 || RHSNeg == 2)
10025         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10026                            GetNegatedExpression(N0, DAG, LegalOperations),
10027                            GetNegatedExpression(N1, DAG, LegalOperations),
10028                            Flags);
10029     }
10030   }
10031 
10032   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10033     return CombineRepeatedDivisors;
10034 
10035   return SDValue();
10036 }
10037 
10038 SDValue DAGCombiner::visitFREM(SDNode *N) {
10039   SDValue N0 = N->getOperand(0);
10040   SDValue N1 = N->getOperand(1);
10041   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10042   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10043   EVT VT = N->getValueType(0);
10044 
10045   // fold (frem c1, c2) -> fmod(c1,c2)
10046   if (N0CFP && N1CFP)
10047     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10048 
10049   if (SDValue NewSel = foldBinOpIntoSelect(N))
10050     return NewSel;
10051 
10052   return SDValue();
10053 }
10054 
10055 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10056   if (!DAG.getTarget().Options.UnsafeFPMath)
10057     return SDValue();
10058 
10059   SDValue N0 = N->getOperand(0);
10060   if (TLI.isFsqrtCheap(N0, DAG))
10061     return SDValue();
10062 
10063   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10064   // For now, create a Flags object for use with all unsafe math transforms.
10065   SDNodeFlags Flags;
10066   Flags.setUnsafeAlgebra(true);
10067   return buildSqrtEstimate(N0, Flags);
10068 }
10069 
10070 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10071 /// copysign(x, fp_round(y)) -> copysign(x, y)
10072 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10073   SDValue N1 = N->getOperand(1);
10074   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10075        N1.getOpcode() == ISD::FP_ROUND)) {
10076     // Do not optimize out type conversion of f128 type yet.
10077     // For some targets like x86_64, configuration is changed to keep one f128
10078     // value in one SSE register, but instruction selection cannot handle
10079     // FCOPYSIGN on SSE registers yet.
10080     EVT N1VT = N1->getValueType(0);
10081     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10082     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10083   }
10084   return false;
10085 }
10086 
10087 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10088   SDValue N0 = N->getOperand(0);
10089   SDValue N1 = N->getOperand(1);
10090   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10091   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10092   EVT VT = N->getValueType(0);
10093 
10094   if (N0CFP && N1CFP) // Constant fold
10095     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10096 
10097   if (N1CFP) {
10098     const APFloat &V = N1CFP->getValueAPF();
10099     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10100     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10101     if (!V.isNegative()) {
10102       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10103         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10104     } else {
10105       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10106         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10107                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10108     }
10109   }
10110 
10111   // copysign(fabs(x), y) -> copysign(x, y)
10112   // copysign(fneg(x), y) -> copysign(x, y)
10113   // copysign(copysign(x,z), y) -> copysign(x, y)
10114   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10115       N0.getOpcode() == ISD::FCOPYSIGN)
10116     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10117 
10118   // copysign(x, abs(y)) -> abs(x)
10119   if (N1.getOpcode() == ISD::FABS)
10120     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10121 
10122   // copysign(x, copysign(y,z)) -> copysign(x, z)
10123   if (N1.getOpcode() == ISD::FCOPYSIGN)
10124     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10125 
10126   // copysign(x, fp_extend(y)) -> copysign(x, y)
10127   // copysign(x, fp_round(y)) -> copysign(x, y)
10128   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10129     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10130 
10131   return SDValue();
10132 }
10133 
10134 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10135   SDValue N0 = N->getOperand(0);
10136   EVT VT = N->getValueType(0);
10137   EVT OpVT = N0.getValueType();
10138 
10139   // fold (sint_to_fp c1) -> c1fp
10140   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10141       // ...but only if the target supports immediate floating-point values
10142       (!LegalOperations ||
10143        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10144     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10145 
10146   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10147   // but UINT_TO_FP is legal on this target, try to convert.
10148   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10149       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10150     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10151     if (DAG.SignBitIsZero(N0))
10152       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10153   }
10154 
10155   // The next optimizations are desirable only if SELECT_CC can be lowered.
10156   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10157     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10158     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10159         !VT.isVector() &&
10160         (!LegalOperations ||
10161          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10162       SDLoc DL(N);
10163       SDValue Ops[] =
10164         { N0.getOperand(0), N0.getOperand(1),
10165           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10166           N0.getOperand(2) };
10167       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10168     }
10169 
10170     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10171     //      (select_cc x, y, 1.0, 0.0,, cc)
10172     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10173         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10174         (!LegalOperations ||
10175          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10176       SDLoc DL(N);
10177       SDValue Ops[] =
10178         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10179           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10180           N0.getOperand(0).getOperand(2) };
10181       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10182     }
10183   }
10184 
10185   return SDValue();
10186 }
10187 
10188 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10189   SDValue N0 = N->getOperand(0);
10190   EVT VT = N->getValueType(0);
10191   EVT OpVT = N0.getValueType();
10192 
10193   // fold (uint_to_fp c1) -> c1fp
10194   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10195       // ...but only if the target supports immediate floating-point values
10196       (!LegalOperations ||
10197        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10198     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10199 
10200   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10201   // but SINT_TO_FP is legal on this target, try to convert.
10202   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10203       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10204     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10205     if (DAG.SignBitIsZero(N0))
10206       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10207   }
10208 
10209   // The next optimizations are desirable only if SELECT_CC can be lowered.
10210   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10211     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10212 
10213     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10214         (!LegalOperations ||
10215          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10216       SDLoc DL(N);
10217       SDValue Ops[] =
10218         { N0.getOperand(0), N0.getOperand(1),
10219           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10220           N0.getOperand(2) };
10221       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10222     }
10223   }
10224 
10225   return SDValue();
10226 }
10227 
10228 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10229 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10230   SDValue N0 = N->getOperand(0);
10231   EVT VT = N->getValueType(0);
10232 
10233   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10234     return SDValue();
10235 
10236   SDValue Src = N0.getOperand(0);
10237   EVT SrcVT = Src.getValueType();
10238   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10239   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10240 
10241   // We can safely assume the conversion won't overflow the output range,
10242   // because (for example) (uint8_t)18293.f is undefined behavior.
10243 
10244   // Since we can assume the conversion won't overflow, our decision as to
10245   // whether the input will fit in the float should depend on the minimum
10246   // of the input range and output range.
10247 
10248   // This means this is also safe for a signed input and unsigned output, since
10249   // a negative input would lead to undefined behavior.
10250   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10251   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10252   unsigned ActualSize = std::min(InputSize, OutputSize);
10253   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10254 
10255   // We can only fold away the float conversion if the input range can be
10256   // represented exactly in the float range.
10257   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10258     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10259       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10260                                                        : ISD::ZERO_EXTEND;
10261       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10262     }
10263     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10264       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10265     return DAG.getBitcast(VT, Src);
10266   }
10267   return SDValue();
10268 }
10269 
10270 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10271   SDValue N0 = N->getOperand(0);
10272   EVT VT = N->getValueType(0);
10273 
10274   // fold (fp_to_sint c1fp) -> c1
10275   if (isConstantFPBuildVectorOrConstantFP(N0))
10276     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10277 
10278   return FoldIntToFPToInt(N, DAG);
10279 }
10280 
10281 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10282   SDValue N0 = N->getOperand(0);
10283   EVT VT = N->getValueType(0);
10284 
10285   // fold (fp_to_uint c1fp) -> c1
10286   if (isConstantFPBuildVectorOrConstantFP(N0))
10287     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10288 
10289   return FoldIntToFPToInt(N, DAG);
10290 }
10291 
10292 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10293   SDValue N0 = N->getOperand(0);
10294   SDValue N1 = N->getOperand(1);
10295   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10296   EVT VT = N->getValueType(0);
10297 
10298   // fold (fp_round c1fp) -> c1fp
10299   if (N0CFP)
10300     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10301 
10302   // fold (fp_round (fp_extend x)) -> x
10303   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10304     return N0.getOperand(0);
10305 
10306   // fold (fp_round (fp_round x)) -> (fp_round x)
10307   if (N0.getOpcode() == ISD::FP_ROUND) {
10308     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10309     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10310 
10311     // Skip this folding if it results in an fp_round from f80 to f16.
10312     //
10313     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10314     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10315     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10316     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10317     // x86.
10318     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10319       return SDValue();
10320 
10321     // If the first fp_round isn't a value preserving truncation, it might
10322     // introduce a tie in the second fp_round, that wouldn't occur in the
10323     // single-step fp_round we want to fold to.
10324     // In other words, double rounding isn't the same as rounding.
10325     // Also, this is a value preserving truncation iff both fp_round's are.
10326     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10327       SDLoc DL(N);
10328       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10329                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10330     }
10331   }
10332 
10333   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10334   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10335     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10336                               N0.getOperand(0), N1);
10337     AddToWorklist(Tmp.getNode());
10338     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10339                        Tmp, N0.getOperand(1));
10340   }
10341 
10342   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10343     return NewVSel;
10344 
10345   return SDValue();
10346 }
10347 
10348 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10349   SDValue N0 = N->getOperand(0);
10350   EVT VT = N->getValueType(0);
10351   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10352   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10353 
10354   // fold (fp_round_inreg c1fp) -> c1fp
10355   if (N0CFP && isTypeLegal(EVT)) {
10356     SDLoc DL(N);
10357     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10358     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10359   }
10360 
10361   return SDValue();
10362 }
10363 
10364 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10365   SDValue N0 = N->getOperand(0);
10366   EVT VT = N->getValueType(0);
10367 
10368   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10369   if (N->hasOneUse() &&
10370       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10371     return SDValue();
10372 
10373   // fold (fp_extend c1fp) -> c1fp
10374   if (isConstantFPBuildVectorOrConstantFP(N0))
10375     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10376 
10377   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10378   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10379       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10380     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10381 
10382   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10383   // value of X.
10384   if (N0.getOpcode() == ISD::FP_ROUND
10385       && N0.getConstantOperandVal(1) == 1) {
10386     SDValue In = N0.getOperand(0);
10387     if (In.getValueType() == VT) return In;
10388     if (VT.bitsLT(In.getValueType()))
10389       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10390                          In, N0.getOperand(1));
10391     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10392   }
10393 
10394   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10395   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10396        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10397     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10398     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10399                                      LN0->getChain(),
10400                                      LN0->getBasePtr(), N0.getValueType(),
10401                                      LN0->getMemOperand());
10402     CombineTo(N, ExtLoad);
10403     CombineTo(N0.getNode(),
10404               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10405                           N0.getValueType(), ExtLoad,
10406                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10407               ExtLoad.getValue(1));
10408     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10409   }
10410 
10411   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10412     return NewVSel;
10413 
10414   return SDValue();
10415 }
10416 
10417 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10418   SDValue N0 = N->getOperand(0);
10419   EVT VT = N->getValueType(0);
10420 
10421   // fold (fceil c1) -> fceil(c1)
10422   if (isConstantFPBuildVectorOrConstantFP(N0))
10423     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10424 
10425   return SDValue();
10426 }
10427 
10428 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10429   SDValue N0 = N->getOperand(0);
10430   EVT VT = N->getValueType(0);
10431 
10432   // fold (ftrunc c1) -> ftrunc(c1)
10433   if (isConstantFPBuildVectorOrConstantFP(N0))
10434     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10435 
10436   return SDValue();
10437 }
10438 
10439 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10440   SDValue N0 = N->getOperand(0);
10441   EVT VT = N->getValueType(0);
10442 
10443   // fold (ffloor c1) -> ffloor(c1)
10444   if (isConstantFPBuildVectorOrConstantFP(N0))
10445     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10446 
10447   return SDValue();
10448 }
10449 
10450 // FIXME: FNEG and FABS have a lot in common; refactor.
10451 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10452   SDValue N0 = N->getOperand(0);
10453   EVT VT = N->getValueType(0);
10454 
10455   // Constant fold FNEG.
10456   if (isConstantFPBuildVectorOrConstantFP(N0))
10457     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10458 
10459   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10460                          &DAG.getTarget().Options))
10461     return GetNegatedExpression(N0, DAG, LegalOperations);
10462 
10463   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10464   // constant pool values.
10465   if (!TLI.isFNegFree(VT) &&
10466       N0.getOpcode() == ISD::BITCAST &&
10467       N0.getNode()->hasOneUse()) {
10468     SDValue Int = N0.getOperand(0);
10469     EVT IntVT = Int.getValueType();
10470     if (IntVT.isInteger() && !IntVT.isVector()) {
10471       APInt SignMask;
10472       if (N0.getValueType().isVector()) {
10473         // For a vector, get a mask such as 0x80... per scalar element
10474         // and splat it.
10475         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10476         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10477       } else {
10478         // For a scalar, just generate 0x80...
10479         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10480       }
10481       SDLoc DL0(N0);
10482       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10483                         DAG.getConstant(SignMask, DL0, IntVT));
10484       AddToWorklist(Int.getNode());
10485       return DAG.getBitcast(VT, Int);
10486     }
10487   }
10488 
10489   // (fneg (fmul c, x)) -> (fmul -c, x)
10490   if (N0.getOpcode() == ISD::FMUL &&
10491       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10492     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10493     if (CFP1) {
10494       APFloat CVal = CFP1->getValueAPF();
10495       CVal.changeSign();
10496       if (Level >= AfterLegalizeDAG &&
10497           (TLI.isFPImmLegal(CVal, VT) ||
10498            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10499         return DAG.getNode(
10500             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10501             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10502             N0->getFlags());
10503     }
10504   }
10505 
10506   return SDValue();
10507 }
10508 
10509 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10510   SDValue N0 = N->getOperand(0);
10511   SDValue N1 = N->getOperand(1);
10512   EVT VT = N->getValueType(0);
10513   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10514   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10515 
10516   if (N0CFP && N1CFP) {
10517     const APFloat &C0 = N0CFP->getValueAPF();
10518     const APFloat &C1 = N1CFP->getValueAPF();
10519     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10520   }
10521 
10522   // Canonicalize to constant on RHS.
10523   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10524      !isConstantFPBuildVectorOrConstantFP(N1))
10525     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10526 
10527   return SDValue();
10528 }
10529 
10530 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10531   SDValue N0 = N->getOperand(0);
10532   SDValue N1 = N->getOperand(1);
10533   EVT VT = N->getValueType(0);
10534   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10535   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10536 
10537   if (N0CFP && N1CFP) {
10538     const APFloat &C0 = N0CFP->getValueAPF();
10539     const APFloat &C1 = N1CFP->getValueAPF();
10540     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10541   }
10542 
10543   // Canonicalize to constant on RHS.
10544   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10545      !isConstantFPBuildVectorOrConstantFP(N1))
10546     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10547 
10548   return SDValue();
10549 }
10550 
10551 SDValue DAGCombiner::visitFABS(SDNode *N) {
10552   SDValue N0 = N->getOperand(0);
10553   EVT VT = N->getValueType(0);
10554 
10555   // fold (fabs c1) -> fabs(c1)
10556   if (isConstantFPBuildVectorOrConstantFP(N0))
10557     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10558 
10559   // fold (fabs (fabs x)) -> (fabs x)
10560   if (N0.getOpcode() == ISD::FABS)
10561     return N->getOperand(0);
10562 
10563   // fold (fabs (fneg x)) -> (fabs x)
10564   // fold (fabs (fcopysign x, y)) -> (fabs x)
10565   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10566     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10567 
10568   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10569   // constant pool values.
10570   if (!TLI.isFAbsFree(VT) &&
10571       N0.getOpcode() == ISD::BITCAST &&
10572       N0.getNode()->hasOneUse()) {
10573     SDValue Int = N0.getOperand(0);
10574     EVT IntVT = Int.getValueType();
10575     if (IntVT.isInteger() && !IntVT.isVector()) {
10576       APInt SignMask;
10577       if (N0.getValueType().isVector()) {
10578         // For a vector, get a mask such as 0x7f... per scalar element
10579         // and splat it.
10580         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10581         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10582       } else {
10583         // For a scalar, just generate 0x7f...
10584         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10585       }
10586       SDLoc DL(N0);
10587       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10588                         DAG.getConstant(SignMask, DL, IntVT));
10589       AddToWorklist(Int.getNode());
10590       return DAG.getBitcast(N->getValueType(0), Int);
10591     }
10592   }
10593 
10594   return SDValue();
10595 }
10596 
10597 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10598   SDValue Chain = N->getOperand(0);
10599   SDValue N1 = N->getOperand(1);
10600   SDValue N2 = N->getOperand(2);
10601 
10602   // If N is a constant we could fold this into a fallthrough or unconditional
10603   // branch. However that doesn't happen very often in normal code, because
10604   // Instcombine/SimplifyCFG should have handled the available opportunities.
10605   // If we did this folding here, it would be necessary to update the
10606   // MachineBasicBlock CFG, which is awkward.
10607 
10608   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10609   // on the target.
10610   if (N1.getOpcode() == ISD::SETCC &&
10611       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10612                                    N1.getOperand(0).getValueType())) {
10613     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10614                        Chain, N1.getOperand(2),
10615                        N1.getOperand(0), N1.getOperand(1), N2);
10616   }
10617 
10618   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10619       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10620        (N1.getOperand(0).hasOneUse() &&
10621         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10622     SDNode *Trunc = nullptr;
10623     if (N1.getOpcode() == ISD::TRUNCATE) {
10624       // Look pass the truncate.
10625       Trunc = N1.getNode();
10626       N1 = N1.getOperand(0);
10627     }
10628 
10629     // Match this pattern so that we can generate simpler code:
10630     //
10631     //   %a = ...
10632     //   %b = and i32 %a, 2
10633     //   %c = srl i32 %b, 1
10634     //   brcond i32 %c ...
10635     //
10636     // into
10637     //
10638     //   %a = ...
10639     //   %b = and i32 %a, 2
10640     //   %c = setcc eq %b, 0
10641     //   brcond %c ...
10642     //
10643     // This applies only when the AND constant value has one bit set and the
10644     // SRL constant is equal to the log2 of the AND constant. The back-end is
10645     // smart enough to convert the result into a TEST/JMP sequence.
10646     SDValue Op0 = N1.getOperand(0);
10647     SDValue Op1 = N1.getOperand(1);
10648 
10649     if (Op0.getOpcode() == ISD::AND &&
10650         Op1.getOpcode() == ISD::Constant) {
10651       SDValue AndOp1 = Op0.getOperand(1);
10652 
10653       if (AndOp1.getOpcode() == ISD::Constant) {
10654         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10655 
10656         if (AndConst.isPowerOf2() &&
10657             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10658           SDLoc DL(N);
10659           SDValue SetCC =
10660             DAG.getSetCC(DL,
10661                          getSetCCResultType(Op0.getValueType()),
10662                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10663                          ISD::SETNE);
10664 
10665           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10666                                           MVT::Other, Chain, SetCC, N2);
10667           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10668           // will convert it back to (X & C1) >> C2.
10669           CombineTo(N, NewBRCond, false);
10670           // Truncate is dead.
10671           if (Trunc)
10672             deleteAndRecombine(Trunc);
10673           // Replace the uses of SRL with SETCC
10674           WorklistRemover DeadNodes(*this);
10675           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10676           deleteAndRecombine(N1.getNode());
10677           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10678         }
10679       }
10680     }
10681 
10682     if (Trunc)
10683       // Restore N1 if the above transformation doesn't match.
10684       N1 = N->getOperand(1);
10685   }
10686 
10687   // Transform br(xor(x, y)) -> br(x != y)
10688   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10689   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10690     SDNode *TheXor = N1.getNode();
10691     SDValue Op0 = TheXor->getOperand(0);
10692     SDValue Op1 = TheXor->getOperand(1);
10693     if (Op0.getOpcode() == Op1.getOpcode()) {
10694       // Avoid missing important xor optimizations.
10695       if (SDValue Tmp = visitXOR(TheXor)) {
10696         if (Tmp.getNode() != TheXor) {
10697           DEBUG(dbgs() << "\nReplacing.8 ";
10698                 TheXor->dump(&DAG);
10699                 dbgs() << "\nWith: ";
10700                 Tmp.getNode()->dump(&DAG);
10701                 dbgs() << '\n');
10702           WorklistRemover DeadNodes(*this);
10703           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10704           deleteAndRecombine(TheXor);
10705           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10706                              MVT::Other, Chain, Tmp, N2);
10707         }
10708 
10709         // visitXOR has changed XOR's operands or replaced the XOR completely,
10710         // bail out.
10711         return SDValue(N, 0);
10712       }
10713     }
10714 
10715     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10716       bool Equal = false;
10717       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10718           Op0.getOpcode() == ISD::XOR) {
10719         TheXor = Op0.getNode();
10720         Equal = true;
10721       }
10722 
10723       EVT SetCCVT = N1.getValueType();
10724       if (LegalTypes)
10725         SetCCVT = getSetCCResultType(SetCCVT);
10726       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10727                                    SetCCVT,
10728                                    Op0, Op1,
10729                                    Equal ? ISD::SETEQ : ISD::SETNE);
10730       // Replace the uses of XOR with SETCC
10731       WorklistRemover DeadNodes(*this);
10732       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10733       deleteAndRecombine(N1.getNode());
10734       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10735                          MVT::Other, Chain, SetCC, N2);
10736     }
10737   }
10738 
10739   return SDValue();
10740 }
10741 
10742 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10743 //
10744 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10745   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10746   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10747 
10748   // If N is a constant we could fold this into a fallthrough or unconditional
10749   // branch. However that doesn't happen very often in normal code, because
10750   // Instcombine/SimplifyCFG should have handled the available opportunities.
10751   // If we did this folding here, it would be necessary to update the
10752   // MachineBasicBlock CFG, which is awkward.
10753 
10754   // Use SimplifySetCC to simplify SETCC's.
10755   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10756                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10757                                false);
10758   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10759 
10760   // fold to a simpler setcc
10761   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10762     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10763                        N->getOperand(0), Simp.getOperand(2),
10764                        Simp.getOperand(0), Simp.getOperand(1),
10765                        N->getOperand(4));
10766 
10767   return SDValue();
10768 }
10769 
10770 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10771 /// and that N may be folded in the load / store addressing mode.
10772 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10773                                     SelectionDAG &DAG,
10774                                     const TargetLowering &TLI) {
10775   EVT VT;
10776   unsigned AS;
10777 
10778   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10779     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10780       return false;
10781     VT = LD->getMemoryVT();
10782     AS = LD->getAddressSpace();
10783   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10784     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10785       return false;
10786     VT = ST->getMemoryVT();
10787     AS = ST->getAddressSpace();
10788   } else
10789     return false;
10790 
10791   TargetLowering::AddrMode AM;
10792   if (N->getOpcode() == ISD::ADD) {
10793     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10794     if (Offset)
10795       // [reg +/- imm]
10796       AM.BaseOffs = Offset->getSExtValue();
10797     else
10798       // [reg +/- reg]
10799       AM.Scale = 1;
10800   } else if (N->getOpcode() == ISD::SUB) {
10801     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10802     if (Offset)
10803       // [reg +/- imm]
10804       AM.BaseOffs = -Offset->getSExtValue();
10805     else
10806       // [reg +/- reg]
10807       AM.Scale = 1;
10808   } else
10809     return false;
10810 
10811   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10812                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10813 }
10814 
10815 /// Try turning a load/store into a pre-indexed load/store when the base
10816 /// pointer is an add or subtract and it has other uses besides the load/store.
10817 /// After the transformation, the new indexed load/store has effectively folded
10818 /// the add/subtract in and all of its other uses are redirected to the
10819 /// new load/store.
10820 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10821   if (Level < AfterLegalizeDAG)
10822     return false;
10823 
10824   bool isLoad = true;
10825   SDValue Ptr;
10826   EVT VT;
10827   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10828     if (LD->isIndexed())
10829       return false;
10830     VT = LD->getMemoryVT();
10831     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10832         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10833       return false;
10834     Ptr = LD->getBasePtr();
10835   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10836     if (ST->isIndexed())
10837       return false;
10838     VT = ST->getMemoryVT();
10839     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10840         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10841       return false;
10842     Ptr = ST->getBasePtr();
10843     isLoad = false;
10844   } else {
10845     return false;
10846   }
10847 
10848   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10849   // out.  There is no reason to make this a preinc/predec.
10850   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10851       Ptr.getNode()->hasOneUse())
10852     return false;
10853 
10854   // Ask the target to do addressing mode selection.
10855   SDValue BasePtr;
10856   SDValue Offset;
10857   ISD::MemIndexedMode AM = ISD::UNINDEXED;
10858   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10859     return false;
10860 
10861   // Backends without true r+i pre-indexed forms may need to pass a
10862   // constant base with a variable offset so that constant coercion
10863   // will work with the patterns in canonical form.
10864   bool Swapped = false;
10865   if (isa<ConstantSDNode>(BasePtr)) {
10866     std::swap(BasePtr, Offset);
10867     Swapped = true;
10868   }
10869 
10870   // Don't create a indexed load / store with zero offset.
10871   if (isNullConstant(Offset))
10872     return false;
10873 
10874   // Try turning it into a pre-indexed load / store except when:
10875   // 1) The new base ptr is a frame index.
10876   // 2) If N is a store and the new base ptr is either the same as or is a
10877   //    predecessor of the value being stored.
10878   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10879   //    that would create a cycle.
10880   // 4) All uses are load / store ops that use it as old base ptr.
10881 
10882   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
10883   // (plus the implicit offset) to a register to preinc anyway.
10884   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10885     return false;
10886 
10887   // Check #2.
10888   if (!isLoad) {
10889     SDValue Val = cast<StoreSDNode>(N)->getValue();
10890     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10891       return false;
10892   }
10893 
10894   // Caches for hasPredecessorHelper.
10895   SmallPtrSet<const SDNode *, 32> Visited;
10896   SmallVector<const SDNode *, 16> Worklist;
10897   Worklist.push_back(N);
10898 
10899   // If the offset is a constant, there may be other adds of constants that
10900   // can be folded with this one. We should do this to avoid having to keep
10901   // a copy of the original base pointer.
10902   SmallVector<SDNode *, 16> OtherUses;
10903   if (isa<ConstantSDNode>(Offset))
10904     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
10905                               UE = BasePtr.getNode()->use_end();
10906          UI != UE; ++UI) {
10907       SDUse &Use = UI.getUse();
10908       // Skip the use that is Ptr and uses of other results from BasePtr's
10909       // node (important for nodes that return multiple results).
10910       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
10911         continue;
10912 
10913       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
10914         continue;
10915 
10916       if (Use.getUser()->getOpcode() != ISD::ADD &&
10917           Use.getUser()->getOpcode() != ISD::SUB) {
10918         OtherUses.clear();
10919         break;
10920       }
10921 
10922       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
10923       if (!isa<ConstantSDNode>(Op1)) {
10924         OtherUses.clear();
10925         break;
10926       }
10927 
10928       // FIXME: In some cases, we can be smarter about this.
10929       if (Op1.getValueType() != Offset.getValueType()) {
10930         OtherUses.clear();
10931         break;
10932       }
10933 
10934       OtherUses.push_back(Use.getUser());
10935     }
10936 
10937   if (Swapped)
10938     std::swap(BasePtr, Offset);
10939 
10940   // Now check for #3 and #4.
10941   bool RealUse = false;
10942 
10943   for (SDNode *Use : Ptr.getNode()->uses()) {
10944     if (Use == N)
10945       continue;
10946     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
10947       return false;
10948 
10949     // If Ptr may be folded in addressing mode of other use, then it's
10950     // not profitable to do this transformation.
10951     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
10952       RealUse = true;
10953   }
10954 
10955   if (!RealUse)
10956     return false;
10957 
10958   SDValue Result;
10959   if (isLoad)
10960     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10961                                 BasePtr, Offset, AM);
10962   else
10963     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10964                                  BasePtr, Offset, AM);
10965   ++PreIndexedNodes;
10966   ++NodesCombined;
10967   DEBUG(dbgs() << "\nReplacing.4 ";
10968         N->dump(&DAG);
10969         dbgs() << "\nWith: ";
10970         Result.getNode()->dump(&DAG);
10971         dbgs() << '\n');
10972   WorklistRemover DeadNodes(*this);
10973   if (isLoad) {
10974     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10975     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10976   } else {
10977     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10978   }
10979 
10980   // Finally, since the node is now dead, remove it from the graph.
10981   deleteAndRecombine(N);
10982 
10983   if (Swapped)
10984     std::swap(BasePtr, Offset);
10985 
10986   // Replace other uses of BasePtr that can be updated to use Ptr
10987   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
10988     unsigned OffsetIdx = 1;
10989     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
10990       OffsetIdx = 0;
10991     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
10992            BasePtr.getNode() && "Expected BasePtr operand");
10993 
10994     // We need to replace ptr0 in the following expression:
10995     //   x0 * offset0 + y0 * ptr0 = t0
10996     // knowing that
10997     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
10998     //
10999     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11000     // indexed load/store and the expresion that needs to be re-written.
11001     //
11002     // Therefore, we have:
11003     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11004 
11005     ConstantSDNode *CN =
11006       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11007     int X0, X1, Y0, Y1;
11008     const APInt &Offset0 = CN->getAPIntValue();
11009     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11010 
11011     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11012     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11013     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11014     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11015 
11016     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11017 
11018     APInt CNV = Offset0;
11019     if (X0 < 0) CNV = -CNV;
11020     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11021     else CNV = CNV - Offset1;
11022 
11023     SDLoc DL(OtherUses[i]);
11024 
11025     // We can now generate the new expression.
11026     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11027     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11028 
11029     SDValue NewUse = DAG.getNode(Opcode,
11030                                  DL,
11031                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11032     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11033     deleteAndRecombine(OtherUses[i]);
11034   }
11035 
11036   // Replace the uses of Ptr with uses of the updated base value.
11037   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11038   deleteAndRecombine(Ptr.getNode());
11039 
11040   return true;
11041 }
11042 
11043 /// Try to combine a load/store with a add/sub of the base pointer node into a
11044 /// post-indexed load/store. The transformation folded the add/subtract into the
11045 /// new indexed load/store effectively and all of its uses are redirected to the
11046 /// new load/store.
11047 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11048   if (Level < AfterLegalizeDAG)
11049     return false;
11050 
11051   bool isLoad = true;
11052   SDValue Ptr;
11053   EVT VT;
11054   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11055     if (LD->isIndexed())
11056       return false;
11057     VT = LD->getMemoryVT();
11058     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11059         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11060       return false;
11061     Ptr = LD->getBasePtr();
11062   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11063     if (ST->isIndexed())
11064       return false;
11065     VT = ST->getMemoryVT();
11066     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11067         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11068       return false;
11069     Ptr = ST->getBasePtr();
11070     isLoad = false;
11071   } else {
11072     return false;
11073   }
11074 
11075   if (Ptr.getNode()->hasOneUse())
11076     return false;
11077 
11078   for (SDNode *Op : Ptr.getNode()->uses()) {
11079     if (Op == N ||
11080         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11081       continue;
11082 
11083     SDValue BasePtr;
11084     SDValue Offset;
11085     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11086     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11087       // Don't create a indexed load / store with zero offset.
11088       if (isNullConstant(Offset))
11089         continue;
11090 
11091       // Try turning it into a post-indexed load / store except when
11092       // 1) All uses are load / store ops that use it as base ptr (and
11093       //    it may be folded as addressing mmode).
11094       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11095       //    nor a successor of N. Otherwise, if Op is folded that would
11096       //    create a cycle.
11097 
11098       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11099         continue;
11100 
11101       // Check for #1.
11102       bool TryNext = false;
11103       for (SDNode *Use : BasePtr.getNode()->uses()) {
11104         if (Use == Ptr.getNode())
11105           continue;
11106 
11107         // If all the uses are load / store addresses, then don't do the
11108         // transformation.
11109         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11110           bool RealUse = false;
11111           for (SDNode *UseUse : Use->uses()) {
11112             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11113               RealUse = true;
11114           }
11115 
11116           if (!RealUse) {
11117             TryNext = true;
11118             break;
11119           }
11120         }
11121       }
11122 
11123       if (TryNext)
11124         continue;
11125 
11126       // Check for #2
11127       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11128         SDValue Result = isLoad
11129           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11130                                BasePtr, Offset, AM)
11131           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11132                                 BasePtr, Offset, AM);
11133         ++PostIndexedNodes;
11134         ++NodesCombined;
11135         DEBUG(dbgs() << "\nReplacing.5 ";
11136               N->dump(&DAG);
11137               dbgs() << "\nWith: ";
11138               Result.getNode()->dump(&DAG);
11139               dbgs() << '\n');
11140         WorklistRemover DeadNodes(*this);
11141         if (isLoad) {
11142           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11143           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11144         } else {
11145           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11146         }
11147 
11148         // Finally, since the node is now dead, remove it from the graph.
11149         deleteAndRecombine(N);
11150 
11151         // Replace the uses of Use with uses of the updated base value.
11152         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11153                                       Result.getValue(isLoad ? 1 : 0));
11154         deleteAndRecombine(Op);
11155         return true;
11156       }
11157     }
11158   }
11159 
11160   return false;
11161 }
11162 
11163 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11164 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11165   ISD::MemIndexedMode AM = LD->getAddressingMode();
11166   assert(AM != ISD::UNINDEXED);
11167   SDValue BP = LD->getOperand(1);
11168   SDValue Inc = LD->getOperand(2);
11169 
11170   // Some backends use TargetConstants for load offsets, but don't expect
11171   // TargetConstants in general ADD nodes. We can convert these constants into
11172   // regular Constants (if the constant is not opaque).
11173   assert((Inc.getOpcode() != ISD::TargetConstant ||
11174           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11175          "Cannot split out indexing using opaque target constants");
11176   if (Inc.getOpcode() == ISD::TargetConstant) {
11177     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11178     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11179                           ConstInc->getValueType(0));
11180   }
11181 
11182   unsigned Opc =
11183       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11184   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11185 }
11186 
11187 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11188   LoadSDNode *LD  = cast<LoadSDNode>(N);
11189   SDValue Chain = LD->getChain();
11190   SDValue Ptr   = LD->getBasePtr();
11191 
11192   // If load is not volatile and there are no uses of the loaded value (and
11193   // the updated indexed value in case of indexed loads), change uses of the
11194   // chain value into uses of the chain input (i.e. delete the dead load).
11195   if (!LD->isVolatile()) {
11196     if (N->getValueType(1) == MVT::Other) {
11197       // Unindexed loads.
11198       if (!N->hasAnyUseOfValue(0)) {
11199         // It's not safe to use the two value CombineTo variant here. e.g.
11200         // v1, chain2 = load chain1, loc
11201         // v2, chain3 = load chain2, loc
11202         // v3         = add v2, c
11203         // Now we replace use of chain2 with chain1.  This makes the second load
11204         // isomorphic to the one we are deleting, and thus makes this load live.
11205         DEBUG(dbgs() << "\nReplacing.6 ";
11206               N->dump(&DAG);
11207               dbgs() << "\nWith chain: ";
11208               Chain.getNode()->dump(&DAG);
11209               dbgs() << "\n");
11210         WorklistRemover DeadNodes(*this);
11211         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11212         AddUsersToWorklist(Chain.getNode());
11213         if (N->use_empty())
11214           deleteAndRecombine(N);
11215 
11216         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11217       }
11218     } else {
11219       // Indexed loads.
11220       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11221 
11222       // If this load has an opaque TargetConstant offset, then we cannot split
11223       // the indexing into an add/sub directly (that TargetConstant may not be
11224       // valid for a different type of node, and we cannot convert an opaque
11225       // target constant into a regular constant).
11226       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11227                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11228 
11229       if (!N->hasAnyUseOfValue(0) &&
11230           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11231         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11232         SDValue Index;
11233         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11234           Index = SplitIndexingFromLoad(LD);
11235           // Try to fold the base pointer arithmetic into subsequent loads and
11236           // stores.
11237           AddUsersToWorklist(N);
11238         } else
11239           Index = DAG.getUNDEF(N->getValueType(1));
11240         DEBUG(dbgs() << "\nReplacing.7 ";
11241               N->dump(&DAG);
11242               dbgs() << "\nWith: ";
11243               Undef.getNode()->dump(&DAG);
11244               dbgs() << " and 2 other values\n");
11245         WorklistRemover DeadNodes(*this);
11246         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11247         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11248         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11249         deleteAndRecombine(N);
11250         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11251       }
11252     }
11253   }
11254 
11255   // If this load is directly stored, replace the load value with the stored
11256   // value.
11257   // TODO: Handle store large -> read small portion.
11258   // TODO: Handle TRUNCSTORE/LOADEXT
11259   if (OptLevel != CodeGenOpt::None &&
11260       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11261     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11262       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11263       if (PrevST->getBasePtr() == Ptr &&
11264           PrevST->getValue().getValueType() == N->getValueType(0))
11265         return CombineTo(N, PrevST->getOperand(1), Chain);
11266     }
11267   }
11268 
11269   // Try to infer better alignment information than the load already has.
11270   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11271     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11272       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11273         SDValue NewLoad = DAG.getExtLoad(
11274             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11275             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11276             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11277         if (NewLoad.getNode() != N)
11278           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11279       }
11280     }
11281   }
11282 
11283   if (LD->isUnindexed()) {
11284     // Walk up chain skipping non-aliasing memory nodes.
11285     SDValue BetterChain = FindBetterChain(N, Chain);
11286 
11287     // If there is a better chain.
11288     if (Chain != BetterChain) {
11289       SDValue ReplLoad;
11290 
11291       // Replace the chain to void dependency.
11292       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11293         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11294                                BetterChain, Ptr, LD->getMemOperand());
11295       } else {
11296         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11297                                   LD->getValueType(0),
11298                                   BetterChain, Ptr, LD->getMemoryVT(),
11299                                   LD->getMemOperand());
11300       }
11301 
11302       // Create token factor to keep old chain connected.
11303       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11304                                   MVT::Other, Chain, ReplLoad.getValue(1));
11305 
11306       // Make sure the new and old chains are cleaned up.
11307       AddToWorklist(Token.getNode());
11308 
11309       // Replace uses with load result and token factor. Don't add users
11310       // to work list.
11311       return CombineTo(N, ReplLoad.getValue(0), Token, false);
11312     }
11313   }
11314 
11315   // Try transforming N to an indexed load.
11316   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11317     return SDValue(N, 0);
11318 
11319   // Try to slice up N to more direct loads if the slices are mapped to
11320   // different register banks or pairing can take place.
11321   if (SliceUpLoad(N))
11322     return SDValue(N, 0);
11323 
11324   return SDValue();
11325 }
11326 
11327 namespace {
11328 /// \brief Helper structure used to slice a load in smaller loads.
11329 /// Basically a slice is obtained from the following sequence:
11330 /// Origin = load Ty1, Base
11331 /// Shift = srl Ty1 Origin, CstTy Amount
11332 /// Inst = trunc Shift to Ty2
11333 ///
11334 /// Then, it will be rewriten into:
11335 /// Slice = load SliceTy, Base + SliceOffset
11336 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11337 ///
11338 /// SliceTy is deduced from the number of bits that are actually used to
11339 /// build Inst.
11340 struct LoadedSlice {
11341   /// \brief Helper structure used to compute the cost of a slice.
11342   struct Cost {
11343     /// Are we optimizing for code size.
11344     bool ForCodeSize;
11345     /// Various cost.
11346     unsigned Loads;
11347     unsigned Truncates;
11348     unsigned CrossRegisterBanksCopies;
11349     unsigned ZExts;
11350     unsigned Shift;
11351 
11352     Cost(bool ForCodeSize = false)
11353         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11354           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11355 
11356     /// \brief Get the cost of one isolated slice.
11357     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11358         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11359           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11360       EVT TruncType = LS.Inst->getValueType(0);
11361       EVT LoadedType = LS.getLoadedType();
11362       if (TruncType != LoadedType &&
11363           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11364         ZExts = 1;
11365     }
11366 
11367     /// \brief Account for slicing gain in the current cost.
11368     /// Slicing provide a few gains like removing a shift or a
11369     /// truncate. This method allows to grow the cost of the original
11370     /// load with the gain from this slice.
11371     void addSliceGain(const LoadedSlice &LS) {
11372       // Each slice saves a truncate.
11373       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11374       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11375                               LS.Inst->getValueType(0)))
11376         ++Truncates;
11377       // If there is a shift amount, this slice gets rid of it.
11378       if (LS.Shift)
11379         ++Shift;
11380       // If this slice can merge a cross register bank copy, account for it.
11381       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11382         ++CrossRegisterBanksCopies;
11383     }
11384 
11385     Cost &operator+=(const Cost &RHS) {
11386       Loads += RHS.Loads;
11387       Truncates += RHS.Truncates;
11388       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11389       ZExts += RHS.ZExts;
11390       Shift += RHS.Shift;
11391       return *this;
11392     }
11393 
11394     bool operator==(const Cost &RHS) const {
11395       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11396              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11397              ZExts == RHS.ZExts && Shift == RHS.Shift;
11398     }
11399 
11400     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11401 
11402     bool operator<(const Cost &RHS) const {
11403       // Assume cross register banks copies are as expensive as loads.
11404       // FIXME: Do we want some more target hooks?
11405       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11406       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11407       // Unless we are optimizing for code size, consider the
11408       // expensive operation first.
11409       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11410         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11411       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11412              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11413     }
11414 
11415     bool operator>(const Cost &RHS) const { return RHS < *this; }
11416 
11417     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11418 
11419     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11420   };
11421   // The last instruction that represent the slice. This should be a
11422   // truncate instruction.
11423   SDNode *Inst;
11424   // The original load instruction.
11425   LoadSDNode *Origin;
11426   // The right shift amount in bits from the original load.
11427   unsigned Shift;
11428   // The DAG from which Origin came from.
11429   // This is used to get some contextual information about legal types, etc.
11430   SelectionDAG *DAG;
11431 
11432   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11433               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11434       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11435 
11436   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11437   /// \return Result is \p BitWidth and has used bits set to 1 and
11438   ///         not used bits set to 0.
11439   APInt getUsedBits() const {
11440     // Reproduce the trunc(lshr) sequence:
11441     // - Start from the truncated value.
11442     // - Zero extend to the desired bit width.
11443     // - Shift left.
11444     assert(Origin && "No original load to compare against.");
11445     unsigned BitWidth = Origin->getValueSizeInBits(0);
11446     assert(Inst && "This slice is not bound to an instruction");
11447     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11448            "Extracted slice is bigger than the whole type!");
11449     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11450     UsedBits.setAllBits();
11451     UsedBits = UsedBits.zext(BitWidth);
11452     UsedBits <<= Shift;
11453     return UsedBits;
11454   }
11455 
11456   /// \brief Get the size of the slice to be loaded in bytes.
11457   unsigned getLoadedSize() const {
11458     unsigned SliceSize = getUsedBits().countPopulation();
11459     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11460     return SliceSize / 8;
11461   }
11462 
11463   /// \brief Get the type that will be loaded for this slice.
11464   /// Note: This may not be the final type for the slice.
11465   EVT getLoadedType() const {
11466     assert(DAG && "Missing context");
11467     LLVMContext &Ctxt = *DAG->getContext();
11468     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11469   }
11470 
11471   /// \brief Get the alignment of the load used for this slice.
11472   unsigned getAlignment() const {
11473     unsigned Alignment = Origin->getAlignment();
11474     unsigned Offset = getOffsetFromBase();
11475     if (Offset != 0)
11476       Alignment = MinAlign(Alignment, Alignment + Offset);
11477     return Alignment;
11478   }
11479 
11480   /// \brief Check if this slice can be rewritten with legal operations.
11481   bool isLegal() const {
11482     // An invalid slice is not legal.
11483     if (!Origin || !Inst || !DAG)
11484       return false;
11485 
11486     // Offsets are for indexed load only, we do not handle that.
11487     if (!Origin->getOffset().isUndef())
11488       return false;
11489 
11490     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11491 
11492     // Check that the type is legal.
11493     EVT SliceType = getLoadedType();
11494     if (!TLI.isTypeLegal(SliceType))
11495       return false;
11496 
11497     // Check that the load is legal for this type.
11498     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11499       return false;
11500 
11501     // Check that the offset can be computed.
11502     // 1. Check its type.
11503     EVT PtrType = Origin->getBasePtr().getValueType();
11504     if (PtrType == MVT::Untyped || PtrType.isExtended())
11505       return false;
11506 
11507     // 2. Check that it fits in the immediate.
11508     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11509       return false;
11510 
11511     // 3. Check that the computation is legal.
11512     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11513       return false;
11514 
11515     // Check that the zext is legal if it needs one.
11516     EVT TruncateType = Inst->getValueType(0);
11517     if (TruncateType != SliceType &&
11518         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11519       return false;
11520 
11521     return true;
11522   }
11523 
11524   /// \brief Get the offset in bytes of this slice in the original chunk of
11525   /// bits.
11526   /// \pre DAG != nullptr.
11527   uint64_t getOffsetFromBase() const {
11528     assert(DAG && "Missing context.");
11529     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11530     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11531     uint64_t Offset = Shift / 8;
11532     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11533     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11534            "The size of the original loaded type is not a multiple of a"
11535            " byte.");
11536     // If Offset is bigger than TySizeInBytes, it means we are loading all
11537     // zeros. This should have been optimized before in the process.
11538     assert(TySizeInBytes > Offset &&
11539            "Invalid shift amount for given loaded size");
11540     if (IsBigEndian)
11541       Offset = TySizeInBytes - Offset - getLoadedSize();
11542     return Offset;
11543   }
11544 
11545   /// \brief Generate the sequence of instructions to load the slice
11546   /// represented by this object and redirect the uses of this slice to
11547   /// this new sequence of instructions.
11548   /// \pre this->Inst && this->Origin are valid Instructions and this
11549   /// object passed the legal check: LoadedSlice::isLegal returned true.
11550   /// \return The last instruction of the sequence used to load the slice.
11551   SDValue loadSlice() const {
11552     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11553     const SDValue &OldBaseAddr = Origin->getBasePtr();
11554     SDValue BaseAddr = OldBaseAddr;
11555     // Get the offset in that chunk of bytes w.r.t. the endianness.
11556     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11557     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11558     if (Offset) {
11559       // BaseAddr = BaseAddr + Offset.
11560       EVT ArithType = BaseAddr.getValueType();
11561       SDLoc DL(Origin);
11562       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11563                               DAG->getConstant(Offset, DL, ArithType));
11564     }
11565 
11566     // Create the type of the loaded slice according to its size.
11567     EVT SliceType = getLoadedType();
11568 
11569     // Create the load for the slice.
11570     SDValue LastInst =
11571         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11572                      Origin->getPointerInfo().getWithOffset(Offset),
11573                      getAlignment(), Origin->getMemOperand()->getFlags());
11574     // If the final type is not the same as the loaded type, this means that
11575     // we have to pad with zero. Create a zero extend for that.
11576     EVT FinalType = Inst->getValueType(0);
11577     if (SliceType != FinalType)
11578       LastInst =
11579           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11580     return LastInst;
11581   }
11582 
11583   /// \brief Check if this slice can be merged with an expensive cross register
11584   /// bank copy. E.g.,
11585   /// i = load i32
11586   /// f = bitcast i32 i to float
11587   bool canMergeExpensiveCrossRegisterBankCopy() const {
11588     if (!Inst || !Inst->hasOneUse())
11589       return false;
11590     SDNode *Use = *Inst->use_begin();
11591     if (Use->getOpcode() != ISD::BITCAST)
11592       return false;
11593     assert(DAG && "Missing context");
11594     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11595     EVT ResVT = Use->getValueType(0);
11596     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11597     const TargetRegisterClass *ArgRC =
11598         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11599     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11600       return false;
11601 
11602     // At this point, we know that we perform a cross-register-bank copy.
11603     // Check if it is expensive.
11604     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11605     // Assume bitcasts are cheap, unless both register classes do not
11606     // explicitly share a common sub class.
11607     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11608       return false;
11609 
11610     // Check if it will be merged with the load.
11611     // 1. Check the alignment constraint.
11612     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11613         ResVT.getTypeForEVT(*DAG->getContext()));
11614 
11615     if (RequiredAlignment > getAlignment())
11616       return false;
11617 
11618     // 2. Check that the load is a legal operation for that type.
11619     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11620       return false;
11621 
11622     // 3. Check that we do not have a zext in the way.
11623     if (Inst->getValueType(0) != getLoadedType())
11624       return false;
11625 
11626     return true;
11627   }
11628 };
11629 }
11630 
11631 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11632 /// \p UsedBits looks like 0..0 1..1 0..0.
11633 static bool areUsedBitsDense(const APInt &UsedBits) {
11634   // If all the bits are one, this is dense!
11635   if (UsedBits.isAllOnesValue())
11636     return true;
11637 
11638   // Get rid of the unused bits on the right.
11639   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11640   // Get rid of the unused bits on the left.
11641   if (NarrowedUsedBits.countLeadingZeros())
11642     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11643   // Check that the chunk of bits is completely used.
11644   return NarrowedUsedBits.isAllOnesValue();
11645 }
11646 
11647 /// \brief Check whether or not \p First and \p Second are next to each other
11648 /// in memory. This means that there is no hole between the bits loaded
11649 /// by \p First and the bits loaded by \p Second.
11650 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11651                                      const LoadedSlice &Second) {
11652   assert(First.Origin == Second.Origin && First.Origin &&
11653          "Unable to match different memory origins.");
11654   APInt UsedBits = First.getUsedBits();
11655   assert((UsedBits & Second.getUsedBits()) == 0 &&
11656          "Slices are not supposed to overlap.");
11657   UsedBits |= Second.getUsedBits();
11658   return areUsedBitsDense(UsedBits);
11659 }
11660 
11661 /// \brief Adjust the \p GlobalLSCost according to the target
11662 /// paring capabilities and the layout of the slices.
11663 /// \pre \p GlobalLSCost should account for at least as many loads as
11664 /// there is in the slices in \p LoadedSlices.
11665 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11666                                  LoadedSlice::Cost &GlobalLSCost) {
11667   unsigned NumberOfSlices = LoadedSlices.size();
11668   // If there is less than 2 elements, no pairing is possible.
11669   if (NumberOfSlices < 2)
11670     return;
11671 
11672   // Sort the slices so that elements that are likely to be next to each
11673   // other in memory are next to each other in the list.
11674   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11675             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11676     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11677     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11678   });
11679   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11680   // First (resp. Second) is the first (resp. Second) potentially candidate
11681   // to be placed in a paired load.
11682   const LoadedSlice *First = nullptr;
11683   const LoadedSlice *Second = nullptr;
11684   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11685                 // Set the beginning of the pair.
11686                                                            First = Second) {
11687 
11688     Second = &LoadedSlices[CurrSlice];
11689 
11690     // If First is NULL, it means we start a new pair.
11691     // Get to the next slice.
11692     if (!First)
11693       continue;
11694 
11695     EVT LoadedType = First->getLoadedType();
11696 
11697     // If the types of the slices are different, we cannot pair them.
11698     if (LoadedType != Second->getLoadedType())
11699       continue;
11700 
11701     // Check if the target supplies paired loads for this type.
11702     unsigned RequiredAlignment = 0;
11703     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11704       // move to the next pair, this type is hopeless.
11705       Second = nullptr;
11706       continue;
11707     }
11708     // Check if we meet the alignment requirement.
11709     if (RequiredAlignment > First->getAlignment())
11710       continue;
11711 
11712     // Check that both loads are next to each other in memory.
11713     if (!areSlicesNextToEachOther(*First, *Second))
11714       continue;
11715 
11716     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11717     --GlobalLSCost.Loads;
11718     // Move to the next pair.
11719     Second = nullptr;
11720   }
11721 }
11722 
11723 /// \brief Check the profitability of all involved LoadedSlice.
11724 /// Currently, it is considered profitable if there is exactly two
11725 /// involved slices (1) which are (2) next to each other in memory, and
11726 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11727 ///
11728 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11729 /// the elements themselves.
11730 ///
11731 /// FIXME: When the cost model will be mature enough, we can relax
11732 /// constraints (1) and (2).
11733 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11734                                 const APInt &UsedBits, bool ForCodeSize) {
11735   unsigned NumberOfSlices = LoadedSlices.size();
11736   if (StressLoadSlicing)
11737     return NumberOfSlices > 1;
11738 
11739   // Check (1).
11740   if (NumberOfSlices != 2)
11741     return false;
11742 
11743   // Check (2).
11744   if (!areUsedBitsDense(UsedBits))
11745     return false;
11746 
11747   // Check (3).
11748   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11749   // The original code has one big load.
11750   OrigCost.Loads = 1;
11751   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11752     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11753     // Accumulate the cost of all the slices.
11754     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11755     GlobalSlicingCost += SliceCost;
11756 
11757     // Account as cost in the original configuration the gain obtained
11758     // with the current slices.
11759     OrigCost.addSliceGain(LS);
11760   }
11761 
11762   // If the target supports paired load, adjust the cost accordingly.
11763   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11764   return OrigCost > GlobalSlicingCost;
11765 }
11766 
11767 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11768 /// operations, split it in the various pieces being extracted.
11769 ///
11770 /// This sort of thing is introduced by SROA.
11771 /// This slicing takes care not to insert overlapping loads.
11772 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11773 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11774   if (Level < AfterLegalizeDAG)
11775     return false;
11776 
11777   LoadSDNode *LD = cast<LoadSDNode>(N);
11778   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11779       !LD->getValueType(0).isInteger())
11780     return false;
11781 
11782   // Keep track of already used bits to detect overlapping values.
11783   // In that case, we will just abort the transformation.
11784   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11785 
11786   SmallVector<LoadedSlice, 4> LoadedSlices;
11787 
11788   // Check if this load is used as several smaller chunks of bits.
11789   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11790   // of computation for each trunc.
11791   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11792        UI != UIEnd; ++UI) {
11793     // Skip the uses of the chain.
11794     if (UI.getUse().getResNo() != 0)
11795       continue;
11796 
11797     SDNode *User = *UI;
11798     unsigned Shift = 0;
11799 
11800     // Check if this is a trunc(lshr).
11801     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11802         isa<ConstantSDNode>(User->getOperand(1))) {
11803       Shift = User->getConstantOperandVal(1);
11804       User = *User->use_begin();
11805     }
11806 
11807     // At this point, User is a Truncate, iff we encountered, trunc or
11808     // trunc(lshr).
11809     if (User->getOpcode() != ISD::TRUNCATE)
11810       return false;
11811 
11812     // The width of the type must be a power of 2 and greater than 8-bits.
11813     // Otherwise the load cannot be represented in LLVM IR.
11814     // Moreover, if we shifted with a non-8-bits multiple, the slice
11815     // will be across several bytes. We do not support that.
11816     unsigned Width = User->getValueSizeInBits(0);
11817     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11818       return 0;
11819 
11820     // Build the slice for this chain of computations.
11821     LoadedSlice LS(User, LD, Shift, &DAG);
11822     APInt CurrentUsedBits = LS.getUsedBits();
11823 
11824     // Check if this slice overlaps with another.
11825     if ((CurrentUsedBits & UsedBits) != 0)
11826       return false;
11827     // Update the bits used globally.
11828     UsedBits |= CurrentUsedBits;
11829 
11830     // Check if the new slice would be legal.
11831     if (!LS.isLegal())
11832       return false;
11833 
11834     // Record the slice.
11835     LoadedSlices.push_back(LS);
11836   }
11837 
11838   // Abort slicing if it does not seem to be profitable.
11839   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11840     return false;
11841 
11842   ++SlicedLoads;
11843 
11844   // Rewrite each chain to use an independent load.
11845   // By construction, each chain can be represented by a unique load.
11846 
11847   // Prepare the argument for the new token factor for all the slices.
11848   SmallVector<SDValue, 8> ArgChains;
11849   for (SmallVectorImpl<LoadedSlice>::const_iterator
11850            LSIt = LoadedSlices.begin(),
11851            LSItEnd = LoadedSlices.end();
11852        LSIt != LSItEnd; ++LSIt) {
11853     SDValue SliceInst = LSIt->loadSlice();
11854     CombineTo(LSIt->Inst, SliceInst, true);
11855     if (SliceInst.getOpcode() != ISD::LOAD)
11856       SliceInst = SliceInst.getOperand(0);
11857     assert(SliceInst->getOpcode() == ISD::LOAD &&
11858            "It takes more than a zext to get to the loaded slice!!");
11859     ArgChains.push_back(SliceInst.getValue(1));
11860   }
11861 
11862   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11863                               ArgChains);
11864   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11865   AddToWorklist(Chain.getNode());
11866   return true;
11867 }
11868 
11869 /// Check to see if V is (and load (ptr), imm), where the load is having
11870 /// specific bytes cleared out.  If so, return the byte size being masked out
11871 /// and the shift amount.
11872 static std::pair<unsigned, unsigned>
11873 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
11874   std::pair<unsigned, unsigned> Result(0, 0);
11875 
11876   // Check for the structure we're looking for.
11877   if (V->getOpcode() != ISD::AND ||
11878       !isa<ConstantSDNode>(V->getOperand(1)) ||
11879       !ISD::isNormalLoad(V->getOperand(0).getNode()))
11880     return Result;
11881 
11882   // Check the chain and pointer.
11883   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11884   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
11885 
11886   // The store should be chained directly to the load or be an operand of a
11887   // tokenfactor.
11888   if (LD == Chain.getNode())
11889     ; // ok.
11890   else if (Chain->getOpcode() != ISD::TokenFactor)
11891     return Result; // Fail.
11892   else {
11893     bool isOk = false;
11894     for (const SDValue &ChainOp : Chain->op_values())
11895       if (ChainOp.getNode() == LD) {
11896         isOk = true;
11897         break;
11898       }
11899     if (!isOk) return Result;
11900   }
11901 
11902   // This only handles simple types.
11903   if (V.getValueType() != MVT::i16 &&
11904       V.getValueType() != MVT::i32 &&
11905       V.getValueType() != MVT::i64)
11906     return Result;
11907 
11908   // Check the constant mask.  Invert it so that the bits being masked out are
11909   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
11910   // follow the sign bit for uniformity.
11911   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
11912   unsigned NotMaskLZ = countLeadingZeros(NotMask);
11913   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
11914   unsigned NotMaskTZ = countTrailingZeros(NotMask);
11915   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
11916   if (NotMaskLZ == 64) return Result;  // All zero mask.
11917 
11918   // See if we have a continuous run of bits.  If so, we have 0*1+0*
11919   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
11920     return Result;
11921 
11922   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
11923   if (V.getValueType() != MVT::i64 && NotMaskLZ)
11924     NotMaskLZ -= 64-V.getValueSizeInBits();
11925 
11926   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
11927   switch (MaskedBytes) {
11928   case 1:
11929   case 2:
11930   case 4: break;
11931   default: return Result; // All one mask, or 5-byte mask.
11932   }
11933 
11934   // Verify that the first bit starts at a multiple of mask so that the access
11935   // is aligned the same as the access width.
11936   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
11937 
11938   Result.first = MaskedBytes;
11939   Result.second = NotMaskTZ/8;
11940   return Result;
11941 }
11942 
11943 
11944 /// Check to see if IVal is something that provides a value as specified by
11945 /// MaskInfo. If so, replace the specified store with a narrower store of
11946 /// truncated IVal.
11947 static SDNode *
11948 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
11949                                 SDValue IVal, StoreSDNode *St,
11950                                 DAGCombiner *DC) {
11951   unsigned NumBytes = MaskInfo.first;
11952   unsigned ByteShift = MaskInfo.second;
11953   SelectionDAG &DAG = DC->getDAG();
11954 
11955   // Check to see if IVal is all zeros in the part being masked in by the 'or'
11956   // that uses this.  If not, this is not a replacement.
11957   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
11958                                   ByteShift*8, (ByteShift+NumBytes)*8);
11959   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
11960 
11961   // Check that it is legal on the target to do this.  It is legal if the new
11962   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
11963   // legalization.
11964   MVT VT = MVT::getIntegerVT(NumBytes*8);
11965   if (!DC->isTypeLegal(VT))
11966     return nullptr;
11967 
11968   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
11969   // shifted by ByteShift and truncated down to NumBytes.
11970   if (ByteShift) {
11971     SDLoc DL(IVal);
11972     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
11973                        DAG.getConstant(ByteShift*8, DL,
11974                                     DC->getShiftAmountTy(IVal.getValueType())));
11975   }
11976 
11977   // Figure out the offset for the store and the alignment of the access.
11978   unsigned StOffset;
11979   unsigned NewAlign = St->getAlignment();
11980 
11981   if (DAG.getDataLayout().isLittleEndian())
11982     StOffset = ByteShift;
11983   else
11984     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
11985 
11986   SDValue Ptr = St->getBasePtr();
11987   if (StOffset) {
11988     SDLoc DL(IVal);
11989     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
11990                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
11991     NewAlign = MinAlign(NewAlign, StOffset);
11992   }
11993 
11994   // Truncate down to the new size.
11995   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
11996 
11997   ++OpsNarrowed;
11998   return DAG
11999       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12000                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12001       .getNode();
12002 }
12003 
12004 
12005 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12006 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12007 /// narrowing the load and store if it would end up being a win for performance
12008 /// or code size.
12009 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12010   StoreSDNode *ST  = cast<StoreSDNode>(N);
12011   if (ST->isVolatile())
12012     return SDValue();
12013 
12014   SDValue Chain = ST->getChain();
12015   SDValue Value = ST->getValue();
12016   SDValue Ptr   = ST->getBasePtr();
12017   EVT VT = Value.getValueType();
12018 
12019   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12020     return SDValue();
12021 
12022   unsigned Opc = Value.getOpcode();
12023 
12024   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12025   // is a byte mask indicating a consecutive number of bytes, check to see if
12026   // Y is known to provide just those bytes.  If so, we try to replace the
12027   // load + replace + store sequence with a single (narrower) store, which makes
12028   // the load dead.
12029   if (Opc == ISD::OR) {
12030     std::pair<unsigned, unsigned> MaskedLoad;
12031     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12032     if (MaskedLoad.first)
12033       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12034                                                   Value.getOperand(1), ST,this))
12035         return SDValue(NewST, 0);
12036 
12037     // Or is commutative, so try swapping X and Y.
12038     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12039     if (MaskedLoad.first)
12040       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12041                                                   Value.getOperand(0), ST,this))
12042         return SDValue(NewST, 0);
12043   }
12044 
12045   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12046       Value.getOperand(1).getOpcode() != ISD::Constant)
12047     return SDValue();
12048 
12049   SDValue N0 = Value.getOperand(0);
12050   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12051       Chain == SDValue(N0.getNode(), 1)) {
12052     LoadSDNode *LD = cast<LoadSDNode>(N0);
12053     if (LD->getBasePtr() != Ptr ||
12054         LD->getPointerInfo().getAddrSpace() !=
12055         ST->getPointerInfo().getAddrSpace())
12056       return SDValue();
12057 
12058     // Find the type to narrow it the load / op / store to.
12059     SDValue N1 = Value.getOperand(1);
12060     unsigned BitWidth = N1.getValueSizeInBits();
12061     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12062     if (Opc == ISD::AND)
12063       Imm ^= APInt::getAllOnesValue(BitWidth);
12064     if (Imm == 0 || Imm.isAllOnesValue())
12065       return SDValue();
12066     unsigned ShAmt = Imm.countTrailingZeros();
12067     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12068     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12069     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12070     // The narrowing should be profitable, the load/store operation should be
12071     // legal (or custom) and the store size should be equal to the NewVT width.
12072     while (NewBW < BitWidth &&
12073            (NewVT.getStoreSizeInBits() != NewBW ||
12074             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12075             !TLI.isNarrowingProfitable(VT, NewVT))) {
12076       NewBW = NextPowerOf2(NewBW);
12077       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12078     }
12079     if (NewBW >= BitWidth)
12080       return SDValue();
12081 
12082     // If the lsb changed does not start at the type bitwidth boundary,
12083     // start at the previous one.
12084     if (ShAmt % NewBW)
12085       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12086     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12087                                    std::min(BitWidth, ShAmt + NewBW));
12088     if ((Imm & Mask) == Imm) {
12089       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12090       if (Opc == ISD::AND)
12091         NewImm ^= APInt::getAllOnesValue(NewBW);
12092       uint64_t PtrOff = ShAmt / 8;
12093       // For big endian targets, we need to adjust the offset to the pointer to
12094       // load the correct bytes.
12095       if (DAG.getDataLayout().isBigEndian())
12096         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12097 
12098       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12099       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12100       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12101         return SDValue();
12102 
12103       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12104                                    Ptr.getValueType(), Ptr,
12105                                    DAG.getConstant(PtrOff, SDLoc(LD),
12106                                                    Ptr.getValueType()));
12107       SDValue NewLD =
12108           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12109                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12110                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12111       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12112                                    DAG.getConstant(NewImm, SDLoc(Value),
12113                                                    NewVT));
12114       SDValue NewST =
12115           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12116                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12117 
12118       AddToWorklist(NewPtr.getNode());
12119       AddToWorklist(NewLD.getNode());
12120       AddToWorklist(NewVal.getNode());
12121       WorklistRemover DeadNodes(*this);
12122       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12123       ++OpsNarrowed;
12124       return NewST;
12125     }
12126   }
12127 
12128   return SDValue();
12129 }
12130 
12131 /// For a given floating point load / store pair, if the load value isn't used
12132 /// by any other operations, then consider transforming the pair to integer
12133 /// load / store operations if the target deems the transformation profitable.
12134 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12135   StoreSDNode *ST  = cast<StoreSDNode>(N);
12136   SDValue Chain = ST->getChain();
12137   SDValue Value = ST->getValue();
12138   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12139       Value.hasOneUse() &&
12140       Chain == SDValue(Value.getNode(), 1)) {
12141     LoadSDNode *LD = cast<LoadSDNode>(Value);
12142     EVT VT = LD->getMemoryVT();
12143     if (!VT.isFloatingPoint() ||
12144         VT != ST->getMemoryVT() ||
12145         LD->isNonTemporal() ||
12146         ST->isNonTemporal() ||
12147         LD->getPointerInfo().getAddrSpace() != 0 ||
12148         ST->getPointerInfo().getAddrSpace() != 0)
12149       return SDValue();
12150 
12151     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12152     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12153         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12154         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12155         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12156       return SDValue();
12157 
12158     unsigned LDAlign = LD->getAlignment();
12159     unsigned STAlign = ST->getAlignment();
12160     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12161     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12162     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12163       return SDValue();
12164 
12165     SDValue NewLD =
12166         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12167                     LD->getPointerInfo(), LDAlign);
12168 
12169     SDValue NewST =
12170         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12171                      ST->getPointerInfo(), STAlign);
12172 
12173     AddToWorklist(NewLD.getNode());
12174     AddToWorklist(NewST.getNode());
12175     WorklistRemover DeadNodes(*this);
12176     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12177     ++LdStFP2Int;
12178     return NewST;
12179   }
12180 
12181   return SDValue();
12182 }
12183 
12184 // This is a helper function for visitMUL to check the profitability
12185 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12186 // MulNode is the original multiply, AddNode is (add x, c1),
12187 // and ConstNode is c2.
12188 //
12189 // If the (add x, c1) has multiple uses, we could increase
12190 // the number of adds if we make this transformation.
12191 // It would only be worth doing this if we can remove a
12192 // multiply in the process. Check for that here.
12193 // To illustrate:
12194 //     (A + c1) * c3
12195 //     (A + c2) * c3
12196 // We're checking for cases where we have common "c3 * A" expressions.
12197 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12198                                               SDValue &AddNode,
12199                                               SDValue &ConstNode) {
12200   APInt Val;
12201 
12202   // If the add only has one use, this would be OK to do.
12203   if (AddNode.getNode()->hasOneUse())
12204     return true;
12205 
12206   // Walk all the users of the constant with which we're multiplying.
12207   for (SDNode *Use : ConstNode->uses()) {
12208 
12209     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12210       continue;
12211 
12212     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12213       SDNode *OtherOp;
12214       SDNode *MulVar = AddNode.getOperand(0).getNode();
12215 
12216       // OtherOp is what we're multiplying against the constant.
12217       if (Use->getOperand(0) == ConstNode)
12218         OtherOp = Use->getOperand(1).getNode();
12219       else
12220         OtherOp = Use->getOperand(0).getNode();
12221 
12222       // Check to see if multiply is with the same operand of our "add".
12223       //
12224       //     ConstNode  = CONST
12225       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12226       //     ...
12227       //     AddNode  = (A + c1)  <-- MulVar is A.
12228       //         = AddNode * ConstNode   <-- current visiting instruction.
12229       //
12230       // If we make this transformation, we will have a common
12231       // multiply (ConstNode * A) that we can save.
12232       if (OtherOp == MulVar)
12233         return true;
12234 
12235       // Now check to see if a future expansion will give us a common
12236       // multiply.
12237       //
12238       //     ConstNode  = CONST
12239       //     AddNode    = (A + c1)
12240       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12241       //     ...
12242       //     OtherOp = (A + c2)
12243       //     Use     = OtherOp * ConstNode <-- visiting Use.
12244       //
12245       // If we make this transformation, we will have a common
12246       // multiply (CONST * A) after we also do the same transformation
12247       // to the "t2" instruction.
12248       if (OtherOp->getOpcode() == ISD::ADD &&
12249           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12250           OtherOp->getOperand(0).getNode() == MulVar)
12251         return true;
12252     }
12253   }
12254 
12255   // Didn't find a case where this would be profitable.
12256   return false;
12257 }
12258 
12259 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12260                                          unsigned NumStores) {
12261   SmallVector<SDValue, 8> Chains;
12262   SmallPtrSet<const SDNode *, 8> Visited;
12263   SDLoc StoreDL(StoreNodes[0].MemNode);
12264 
12265   for (unsigned i = 0; i < NumStores; ++i) {
12266     Visited.insert(StoreNodes[i].MemNode);
12267   }
12268 
12269   // don't include nodes that are children
12270   for (unsigned i = 0; i < NumStores; ++i) {
12271     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12272       Chains.push_back(StoreNodes[i].MemNode->getChain());
12273   }
12274 
12275   assert(Chains.size() > 0 && "Chain should have generated a chain");
12276   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12277 }
12278 
12279 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12280     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12281     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12282   // Make sure we have something to merge.
12283   if (NumStores < 2)
12284     return false;
12285 
12286   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12287 
12288   // The latest Node in the DAG.
12289   SDLoc DL(StoreNodes[0].MemNode);
12290 
12291   SDValue StoredVal;
12292   if (UseVector) {
12293     bool IsVec = MemVT.isVector();
12294     unsigned Elts = NumStores;
12295     if (IsVec) {
12296       // When merging vector stores, get the total number of elements.
12297       Elts *= MemVT.getVectorNumElements();
12298     }
12299     // Get the type for the merged vector store.
12300     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12301     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12302 
12303     if (IsConstantSrc) {
12304       SmallVector<SDValue, 8> BuildVector;
12305       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12306         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12307         SDValue Val = St->getValue();
12308         if (MemVT.getScalarType().isInteger())
12309           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12310             Val = DAG.getConstant(
12311                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12312                 SDLoc(CFP), MemVT);
12313         BuildVector.push_back(Val);
12314       }
12315       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12316     } else {
12317       SmallVector<SDValue, 8> Ops;
12318       for (unsigned i = 0; i < NumStores; ++i) {
12319         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12320         SDValue Val = St->getValue();
12321         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12322         if (Val.getValueType() != MemVT)
12323           return false;
12324         Ops.push_back(Val);
12325       }
12326 
12327       // Build the extracted vector elements back into a vector.
12328       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12329                               DL, Ty, Ops);    }
12330   } else {
12331     // We should always use a vector store when merging extracted vector
12332     // elements, so this path implies a store of constants.
12333     assert(IsConstantSrc && "Merged vector elements should use vector store");
12334 
12335     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12336     APInt StoreInt(SizeInBits, 0);
12337 
12338     // Construct a single integer constant which is made of the smaller
12339     // constant inputs.
12340     bool IsLE = DAG.getDataLayout().isLittleEndian();
12341     for (unsigned i = 0; i < NumStores; ++i) {
12342       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12343       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12344 
12345       SDValue Val = St->getValue();
12346       StoreInt <<= ElementSizeBytes * 8;
12347       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12348         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12349       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12350         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12351       } else {
12352         llvm_unreachable("Invalid constant element type");
12353       }
12354     }
12355 
12356     // Create the new Load and Store operations.
12357     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12358     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12359   }
12360 
12361   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12362   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12363 
12364   // make sure we use trunc store if it's necessary to be legal.
12365   SDValue NewStore;
12366   if (UseVector || !UseTrunc) {
12367     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12368                             FirstInChain->getPointerInfo(),
12369                             FirstInChain->getAlignment());
12370   } else { // Must be realized as a trunc store
12371     EVT LegalizedStoredValueTy =
12372         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12373     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12374     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12375     SDValue ExtendedStoreVal =
12376         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12377                         LegalizedStoredValueTy);
12378     NewStore = DAG.getTruncStore(
12379         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12380         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12381         FirstInChain->getAlignment(),
12382         FirstInChain->getMemOperand()->getFlags());
12383   }
12384 
12385   // Replace all merged stores with the new store.
12386   for (unsigned i = 0; i < NumStores; ++i)
12387     CombineTo(StoreNodes[i].MemNode, NewStore);
12388 
12389   AddToWorklist(NewChain.getNode());
12390   return true;
12391 }
12392 
12393 void DAGCombiner::getStoreMergeCandidates(
12394     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12395   // This holds the base pointer, index, and the offset in bytes from the base
12396   // pointer.
12397   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
12398   EVT MemVT = St->getMemoryVT();
12399 
12400   // We must have a base and an offset.
12401   if (!BasePtr.getBase().getNode())
12402     return;
12403 
12404   // Do not handle stores to undef base pointers.
12405   if (BasePtr.getBase().isUndef())
12406     return;
12407 
12408   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12409                        isa<ConstantFPSDNode>(St->getValue());
12410   bool IsExtractVecSrc =
12411       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12412        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12413   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12414   BaseIndexOffset LBasePtr;
12415   // Match on loadbaseptr if relevant.
12416   if (IsLoadSrc)
12417     LBasePtr =
12418         BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
12419 
12420   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12421                             int64_t &Offset) -> bool {
12422     if (Other->isVolatile() || Other->isIndexed())
12423       return false;
12424     // We can merge constant floats to equivalent integers
12425     if (Other->getMemoryVT() != MemVT)
12426       if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
12427             isa<ConstantFPSDNode>(Other->getValue())))
12428         return false;
12429     if (IsLoadSrc) {
12430       // The Load's Base Ptr must also match
12431       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
12432         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
12433         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12434           return false;
12435       } else
12436         return false;
12437     }
12438     if (IsConstantSrc)
12439       if (!(isa<ConstantSDNode>(Other->getValue()) ||
12440             isa<ConstantFPSDNode>(Other->getValue())))
12441         return false;
12442     if (IsExtractVecSrc)
12443       if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12444             Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
12445         return false;
12446     Ptr = BaseIndexOffset::match(Other->getBasePtr());
12447     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12448   };
12449   // We looking for a root node which is an ancestor to all mergable
12450   // stores. We search up through a load, to our root and then down
12451   // through all children. For instance we will find Store{1,2,3} if
12452   // St is Store1, Store2. or Store3 where the root is not a load
12453   // which always true for nonvolatile ops. TODO: Expand
12454   // the search to find all valid candidates through multiple layers of loads.
12455   //
12456   // Root
12457   // |-------|-------|
12458   // Load    Load    Store3
12459   // |       |
12460   // Store1   Store2
12461   //
12462   // FIXME: We should be able to climb and
12463   // descend TokenFactors to find candidates as well.
12464 
12465   SDNode *RootNode = (St->getChain()).getNode();
12466 
12467   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12468     RootNode = Ldn->getChain().getNode();
12469     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12470       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12471         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12472           if (I2.getOperandNo() == 0)
12473             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12474               BaseIndexOffset Ptr;
12475               int64_t PtrDiff;
12476               if (CandidateMatch(OtherST, Ptr, PtrDiff))
12477                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12478             }
12479   } else
12480     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12481       if (I.getOperandNo() == 0)
12482         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12483           BaseIndexOffset Ptr;
12484           int64_t PtrDiff;
12485           if (CandidateMatch(OtherST, Ptr, PtrDiff))
12486             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12487         }
12488 }
12489 
12490 // We need to check that merging these stores does not cause a loop
12491 // in the DAG. Any store candidate may depend on another candidate
12492 // indirectly through its operand (we already consider dependencies
12493 // through the chain). Check in parallel by searching up from
12494 // non-chain operands of candidates.
12495 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12496     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12497   SmallPtrSet<const SDNode *, 16> Visited;
12498   SmallVector<const SDNode *, 8> Worklist;
12499   // search ops of store candidates
12500   for (unsigned i = 0; i < NumStores; ++i) {
12501     SDNode *n = StoreNodes[i].MemNode;
12502     // Potential loops may happen only through non-chain operands
12503     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12504       Worklist.push_back(n->getOperand(j).getNode());
12505   }
12506   // search through DAG. We can stop early if we find a storenode
12507   for (unsigned i = 0; i < NumStores; ++i) {
12508     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12509       return false;
12510   }
12511   return true;
12512 }
12513 
12514 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12515   if (OptLevel == CodeGenOpt::None)
12516     return false;
12517 
12518   EVT MemVT = St->getMemoryVT();
12519   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12520 
12521   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12522     return false;
12523 
12524   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12525       Attribute::NoImplicitFloat);
12526 
12527   // This function cannot currently deal with non-byte-sized memory sizes.
12528   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12529     return false;
12530 
12531   if (!MemVT.isSimple())
12532     return false;
12533 
12534   // Perform an early exit check. Do not bother looking at stored values that
12535   // are not constants, loads, or extracted vector elements.
12536   SDValue StoredVal = St->getValue();
12537   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12538   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12539                        isa<ConstantFPSDNode>(StoredVal);
12540   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12541                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12542 
12543   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12544     return false;
12545 
12546   // Don't merge vectors into wider vectors if the source data comes from loads.
12547   // TODO: This restriction can be lifted by using logic similar to the
12548   // ExtractVecSrc case.
12549   if (MemVT.isVector() && IsLoadSrc)
12550     return false;
12551 
12552   SmallVector<MemOpLink, 8> StoreNodes;
12553   // Find potential store merge candidates by searching through chain sub-DAG
12554   getStoreMergeCandidates(St, StoreNodes);
12555 
12556   // Check if there is anything to merge.
12557   if (StoreNodes.size() < 2)
12558     return false;
12559 
12560   // Sort the memory operands according to their distance from the
12561   // base pointer.
12562   std::sort(StoreNodes.begin(), StoreNodes.end(),
12563             [](MemOpLink LHS, MemOpLink RHS) {
12564               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12565             });
12566 
12567   // Store Merge attempts to merge the lowest stores. This generally
12568   // works out as if successful, as the remaining stores are checked
12569   // after the first collection of stores is merged. However, in the
12570   // case that a non-mergeable store is found first, e.g., {p[-2],
12571   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12572   // mergeable cases. To prevent this, we prune such stores from the
12573   // front of StoreNodes here.
12574 
12575   bool RV = false;
12576   while (StoreNodes.size() > 1) {
12577     unsigned StartIdx = 0;
12578     while ((StartIdx + 1 < StoreNodes.size()) &&
12579            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12580                StoreNodes[StartIdx + 1].OffsetFromBase)
12581       ++StartIdx;
12582 
12583     // Bail if we don't have enough candidates to merge.
12584     if (StartIdx + 1 >= StoreNodes.size())
12585       return RV;
12586 
12587     if (StartIdx)
12588       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12589 
12590     // Scan the memory operations on the chain and find the first
12591     // non-consecutive store memory address.
12592     unsigned NumConsecutiveStores = 1;
12593     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12594     // Check that the addresses are consecutive starting from the second
12595     // element in the list of stores.
12596     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12597       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12598       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12599         break;
12600       NumConsecutiveStores = i + 1;
12601     }
12602 
12603     if (NumConsecutiveStores < 2) {
12604       StoreNodes.erase(StoreNodes.begin(),
12605                        StoreNodes.begin() + NumConsecutiveStores);
12606       continue;
12607     }
12608 
12609     // Check that we can merge these candidates without causing a cycle
12610     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12611                                                   NumConsecutiveStores)) {
12612       StoreNodes.erase(StoreNodes.begin(),
12613                        StoreNodes.begin() + NumConsecutiveStores);
12614       continue;
12615     }
12616 
12617     // The node with the lowest store address.
12618     LLVMContext &Context = *DAG.getContext();
12619     const DataLayout &DL = DAG.getDataLayout();
12620 
12621     // Store the constants into memory as one consecutive store.
12622     if (IsConstantSrc) {
12623       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12624       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12625       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12626       unsigned LastLegalType = 1;
12627       unsigned LastLegalVectorType = 1;
12628       bool LastIntegerTrunc = false;
12629       bool NonZero = false;
12630       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12631         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12632         SDValue StoredVal = ST->getValue();
12633 
12634         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12635           NonZero |= !C->isNullValue();
12636         } else if (ConstantFPSDNode *C =
12637                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12638           NonZero |= !C->getConstantFPValue()->isNullValue();
12639         } else {
12640           // Non-constant.
12641           break;
12642         }
12643 
12644         // Find a legal type for the constant store.
12645         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12646         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12647         bool IsFast = false;
12648         if (TLI.isTypeLegal(StoreTy) &&
12649             TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12650             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12651                                    FirstStoreAlign, &IsFast) &&
12652             IsFast) {
12653           LastIntegerTrunc = false;
12654           LastLegalType = i + 1;
12655           // Or check whether a truncstore is legal.
12656         } else if (TLI.getTypeAction(Context, StoreTy) ==
12657                    TargetLowering::TypePromoteInteger) {
12658           EVT LegalizedStoredValueTy =
12659               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12660           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12661               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
12662               TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12663                                      FirstStoreAS, FirstStoreAlign, &IsFast) &&
12664               IsFast) {
12665             LastIntegerTrunc = true;
12666             LastLegalType = i + 1;
12667           }
12668         }
12669 
12670         // We only use vectors if the constant is known to be zero or the target
12671         // allows it and the function is not marked with the noimplicitfloat
12672         // attribute.
12673         if ((!NonZero ||
12674              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12675             !NoVectors) {
12676           // Find a legal type for the vector store.
12677           EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12678           if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
12679               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12680                                      FirstStoreAlign, &IsFast) &&
12681               IsFast)
12682             LastLegalVectorType = i + 1;
12683         }
12684       }
12685 
12686       // Check if we found a legal integer type that creates a meaningful merge.
12687       if (LastLegalType < 2 && LastLegalVectorType < 2) {
12688         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12689         continue;
12690       }
12691 
12692       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12693       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12694 
12695       bool Merged = MergeStoresOfConstantsOrVecElts(
12696           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
12697       if (!Merged) {
12698         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12699         continue;
12700       }
12701       // Remove merged stores for next iteration.
12702       RV = true;
12703       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12704       continue;
12705     }
12706 
12707     // When extracting multiple vector elements, try to store them
12708     // in one vector store rather than a sequence of scalar stores.
12709     if (IsExtractVecSrc) {
12710       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12711       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12712       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12713       unsigned NumStoresToMerge = 1;
12714       bool IsVec = MemVT.isVector();
12715       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12716         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12717         unsigned StoreValOpcode = St->getValue().getOpcode();
12718         // This restriction could be loosened.
12719         // Bail out if any stored values are not elements extracted from a
12720         // vector. It should be possible to handle mixed sources, but load
12721         // sources need more careful handling (see the block of code below that
12722         // handles consecutive loads).
12723         if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12724             StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12725           return RV;
12726 
12727         // Find a legal type for the vector store.
12728         unsigned Elts = i + 1;
12729         if (IsVec) {
12730           // When merging vector stores, get the total number of elements.
12731           Elts *= MemVT.getVectorNumElements();
12732         }
12733         EVT Ty =
12734             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12735         bool IsFast;
12736         if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) &&
12737             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12738                                    FirstStoreAlign, &IsFast) &&
12739             IsFast)
12740           NumStoresToMerge = i + 1;
12741       }
12742 
12743       bool Merged = MergeStoresOfConstantsOrVecElts(
12744           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
12745       if (!Merged) {
12746         StoreNodes.erase(StoreNodes.begin(),
12747                          StoreNodes.begin() + NumStoresToMerge);
12748         continue;
12749       }
12750       // Remove merged stores for next iteration.
12751       StoreNodes.erase(StoreNodes.begin(),
12752                        StoreNodes.begin() + NumStoresToMerge);
12753       RV = true;
12754       continue;
12755     }
12756 
12757     // Below we handle the case of multiple consecutive stores that
12758     // come from multiple consecutive loads. We merge them into a single
12759     // wide load and a single wide store.
12760 
12761     // Look for load nodes which are used by the stored values.
12762     SmallVector<MemOpLink, 8> LoadNodes;
12763 
12764     // Find acceptable loads. Loads need to have the same chain (token factor),
12765     // must not be zext, volatile, indexed, and they must be consecutive.
12766     BaseIndexOffset LdBasePtr;
12767     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12768       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12769       LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12770       if (!Ld)
12771         break;
12772 
12773       // Loads must only have one use.
12774       if (!Ld->hasNUsesOfValue(1, 0))
12775         break;
12776 
12777       // The memory operands must not be volatile.
12778       if (Ld->isVolatile() || Ld->isIndexed())
12779         break;
12780 
12781       // We do not accept ext loads.
12782       if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12783         break;
12784 
12785       // The stored memory type must be the same.
12786       if (Ld->getMemoryVT() != MemVT)
12787         break;
12788 
12789       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
12790       // If this is not the first ptr that we check.
12791       int64_t LdOffset = 0;
12792       if (LdBasePtr.getBase().getNode()) {
12793         // The base ptr must be the same.
12794         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
12795           break;
12796       } else {
12797         // Check that all other base pointers are the same as this one.
12798         LdBasePtr = LdPtr;
12799       }
12800 
12801       // We found a potential memory operand to merge.
12802       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
12803     }
12804 
12805     if (LoadNodes.size() < 2) {
12806       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12807       continue;
12808     }
12809 
12810     // If we have load/store pair instructions and we only have two values,
12811     // don't bother merging.
12812     unsigned RequiredAlignment;
12813     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12814         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
12815       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
12816       continue;
12817     }
12818     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12819     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12820     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12821     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12822     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12823     unsigned FirstLoadAlign = FirstLoad->getAlignment();
12824 
12825     // Scan the memory operations on the chain and find the first
12826     // non-consecutive load memory address. These variables hold the index in
12827     // the store node array.
12828     unsigned LastConsecutiveLoad = 1;
12829     // This variable refers to the size and not index in the array.
12830     unsigned LastLegalVectorType = 1;
12831     unsigned LastLegalIntegerType = 1;
12832     bool DoIntegerTruncate = false;
12833     StartAddress = LoadNodes[0].OffsetFromBase;
12834     SDValue FirstChain = FirstLoad->getChain();
12835     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12836       // All loads must share the same chain.
12837       if (LoadNodes[i].MemNode->getChain() != FirstChain)
12838         break;
12839 
12840       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
12841       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12842         break;
12843       LastConsecutiveLoad = i;
12844       // Find a legal type for the vector store.
12845       EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
12846       bool IsFastSt, IsFastLd;
12847       if (TLI.isTypeLegal(StoreTy) &&
12848           TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12849           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12850                                  FirstStoreAlign, &IsFastSt) &&
12851           IsFastSt &&
12852           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12853                                  FirstLoadAlign, &IsFastLd) &&
12854           IsFastLd) {
12855         LastLegalVectorType = i + 1;
12856       }
12857 
12858       // Find a legal type for the integer store.
12859       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12860       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12861       if (TLI.isTypeLegal(StoreTy) &&
12862           TLI.canMergeStoresTo(FirstStoreAS, StoreTy) &&
12863           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12864                                  FirstStoreAlign, &IsFastSt) &&
12865           IsFastSt &&
12866           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12867                                  FirstLoadAlign, &IsFastLd) &&
12868           IsFastLd) {
12869         LastLegalIntegerType = i + 1;
12870         DoIntegerTruncate = false;
12871         // Or check whether a truncstore and extload is legal.
12872       } else if (TLI.getTypeAction(Context, StoreTy) ==
12873                  TargetLowering::TypePromoteInteger) {
12874         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
12875         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12876             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
12877             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
12878                                StoreTy) &&
12879             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
12880                                StoreTy) &&
12881             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12882             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12883                                    FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12884             IsFastSt &&
12885             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12886                                    FirstLoadAlign, &IsFastLd) &&
12887             IsFastLd) {
12888           LastLegalIntegerType = i + 1;
12889           DoIntegerTruncate = true;
12890         }
12891       }
12892     }
12893 
12894     // Only use vector types if the vector type is larger than the integer type.
12895     // If they are the same, use integers.
12896     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12897     unsigned LastLegalType =
12898         std::max(LastLegalVectorType, LastLegalIntegerType);
12899 
12900     // We add +1 here because the LastXXX variables refer to location while
12901     // the NumElem refers to array/index size.
12902     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12903     NumElem = std::min(LastLegalType, NumElem);
12904 
12905     if (NumElem < 2) {
12906       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
12907       continue;
12908     }
12909 
12910     // Find if it is better to use vectors or integers to load and store
12911     // to memory.
12912     EVT JointMemOpVT;
12913     if (UseVectorTy) {
12914       JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12915     } else {
12916       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12917       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12918     }
12919 
12920     SDLoc LoadDL(LoadNodes[0].MemNode);
12921     SDLoc StoreDL(StoreNodes[0].MemNode);
12922 
12923     // The merged loads are required to have the same incoming chain, so
12924     // using the first's chain is acceptable.
12925 
12926     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
12927     AddToWorklist(NewStoreChain.getNode());
12928 
12929     SDValue NewLoad, NewStore;
12930     if (UseVectorTy || !DoIntegerTruncate) {
12931       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
12932                             FirstLoad->getBasePtr(),
12933                             FirstLoad->getPointerInfo(), FirstLoadAlign);
12934       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
12935                               FirstInChain->getBasePtr(),
12936                               FirstInChain->getPointerInfo(), FirstStoreAlign);
12937     } else { // This must be the truncstore/extload case
12938       EVT ExtendedTy =
12939           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
12940       NewLoad =
12941           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
12942                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
12943                          JointMemOpVT, FirstLoadAlign);
12944       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
12945                                    FirstInChain->getBasePtr(),
12946                                    FirstInChain->getPointerInfo(), JointMemOpVT,
12947                                    FirstInChain->getAlignment(),
12948                                    FirstInChain->getMemOperand()->getFlags());
12949     }
12950 
12951     // Transfer chain users from old loads to the new load.
12952     for (unsigned i = 0; i < NumElem; ++i) {
12953       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12954       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12955                                     SDValue(NewLoad.getNode(), 1));
12956     }
12957 
12958     // Replace the all stores with the new store.
12959     for (unsigned i = 0; i < NumElem; ++i)
12960       CombineTo(StoreNodes[i].MemNode, NewStore);
12961     RV = true;
12962     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12963     continue;
12964   }
12965   return RV;
12966 }
12967 
12968 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12969   SDLoc SL(ST);
12970   SDValue ReplStore;
12971 
12972   // Replace the chain to avoid dependency.
12973   if (ST->isTruncatingStore()) {
12974     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12975                                   ST->getBasePtr(), ST->getMemoryVT(),
12976                                   ST->getMemOperand());
12977   } else {
12978     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12979                              ST->getMemOperand());
12980   }
12981 
12982   // Create token to keep both nodes around.
12983   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12984                               MVT::Other, ST->getChain(), ReplStore);
12985 
12986   // Make sure the new and old chains are cleaned up.
12987   AddToWorklist(Token.getNode());
12988 
12989   // Don't add users to work list.
12990   return CombineTo(ST, Token, false);
12991 }
12992 
12993 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12994   SDValue Value = ST->getValue();
12995   if (Value.getOpcode() == ISD::TargetConstantFP)
12996     return SDValue();
12997 
12998   SDLoc DL(ST);
12999 
13000   SDValue Chain = ST->getChain();
13001   SDValue Ptr = ST->getBasePtr();
13002 
13003   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13004 
13005   // NOTE: If the original store is volatile, this transform must not increase
13006   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13007   // processor operation but an i64 (which is not legal) requires two.  So the
13008   // transform should not be done in this case.
13009 
13010   SDValue Tmp;
13011   switch (CFP->getSimpleValueType(0).SimpleTy) {
13012   default:
13013     llvm_unreachable("Unknown FP type");
13014   case MVT::f16:    // We don't do this for these yet.
13015   case MVT::f80:
13016   case MVT::f128:
13017   case MVT::ppcf128:
13018     return SDValue();
13019   case MVT::f32:
13020     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13021         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13022       ;
13023       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13024                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13025                             MVT::i32);
13026       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13027     }
13028 
13029     return SDValue();
13030   case MVT::f64:
13031     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13032          !ST->isVolatile()) ||
13033         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13034       ;
13035       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13036                             getZExtValue(), SDLoc(CFP), MVT::i64);
13037       return DAG.getStore(Chain, DL, Tmp,
13038                           Ptr, ST->getMemOperand());
13039     }
13040 
13041     if (!ST->isVolatile() &&
13042         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13043       // Many FP stores are not made apparent until after legalize, e.g. for
13044       // argument passing.  Since this is so common, custom legalize the
13045       // 64-bit integer store into two 32-bit stores.
13046       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13047       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13048       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13049       if (DAG.getDataLayout().isBigEndian())
13050         std::swap(Lo, Hi);
13051 
13052       unsigned Alignment = ST->getAlignment();
13053       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13054       AAMDNodes AAInfo = ST->getAAInfo();
13055 
13056       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13057                                  ST->getAlignment(), MMOFlags, AAInfo);
13058       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13059                         DAG.getConstant(4, DL, Ptr.getValueType()));
13060       Alignment = MinAlign(Alignment, 4U);
13061       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13062                                  ST->getPointerInfo().getWithOffset(4),
13063                                  Alignment, MMOFlags, AAInfo);
13064       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13065                          St0, St1);
13066     }
13067 
13068     return SDValue();
13069   }
13070 }
13071 
13072 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13073   StoreSDNode *ST  = cast<StoreSDNode>(N);
13074   SDValue Chain = ST->getChain();
13075   SDValue Value = ST->getValue();
13076   SDValue Ptr   = ST->getBasePtr();
13077 
13078   // If this is a store of a bit convert, store the input value if the
13079   // resultant store does not need a higher alignment than the original.
13080   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13081       ST->isUnindexed()) {
13082     EVT SVT = Value.getOperand(0).getValueType();
13083     if (((!LegalOperations && !ST->isVolatile()) ||
13084          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13085         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13086       unsigned OrigAlign = ST->getAlignment();
13087       bool Fast = false;
13088       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13089                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13090           Fast) {
13091         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13092                             ST->getPointerInfo(), OrigAlign,
13093                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13094       }
13095     }
13096   }
13097 
13098   // Turn 'store undef, Ptr' -> nothing.
13099   if (Value.isUndef() && ST->isUnindexed())
13100     return Chain;
13101 
13102   // Try to infer better alignment information than the store already has.
13103   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13104     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13105       if (Align > ST->getAlignment()) {
13106         SDValue NewStore =
13107             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13108                               ST->getMemoryVT(), Align,
13109                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13110         if (NewStore.getNode() != N)
13111           return CombineTo(ST, NewStore, true);
13112       }
13113     }
13114   }
13115 
13116   // Try transforming a pair floating point load / store ops to integer
13117   // load / store ops.
13118   if (SDValue NewST = TransformFPLoadStorePair(N))
13119     return NewST;
13120 
13121   if (ST->isUnindexed()) {
13122     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13123     // adjacent stores.
13124     if (findBetterNeighborChains(ST)) {
13125       // replaceStoreChain uses CombineTo, which handled all of the worklist
13126       // manipulation. Return the original node to not do anything else.
13127       return SDValue(ST, 0);
13128     }
13129     Chain = ST->getChain();
13130   }
13131 
13132   // FIXME: is there such a thing as a truncating indexed store?
13133   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13134       Value.getValueType().isInteger()) {
13135     // See if we can simplify the input to this truncstore with knowledge that
13136     // only the low bits are being used.  For example:
13137     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13138     SDValue Shorter = GetDemandedBits(
13139         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13140                                     ST->getMemoryVT().getScalarSizeInBits()));
13141     AddToWorklist(Value.getNode());
13142     if (Shorter.getNode())
13143       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13144                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13145 
13146     // Otherwise, see if we can simplify the operation with
13147     // SimplifyDemandedBits, which only works if the value has a single use.
13148     if (SimplifyDemandedBits(
13149             Value,
13150             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13151                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13152       // Re-visit the store if anything changed and the store hasn't been merged
13153       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13154       // node back to the worklist if necessary, but we also need to re-visit
13155       // the Store node itself.
13156       if (N->getOpcode() != ISD::DELETED_NODE)
13157         AddToWorklist(N);
13158       return SDValue(N, 0);
13159     }
13160   }
13161 
13162   // If this is a load followed by a store to the same location, then the store
13163   // is dead/noop.
13164   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13165     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13166         ST->isUnindexed() && !ST->isVolatile() &&
13167         // There can't be any side effects between the load and store, such as
13168         // a call or store.
13169         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13170       // The store is dead, remove it.
13171       return Chain;
13172     }
13173   }
13174 
13175   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13176     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13177         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13178         ST->getMemoryVT() == ST1->getMemoryVT()) {
13179       // If this is a store followed by a store with the same value to the same
13180       // location, then the store is dead/noop.
13181       if (ST1->getValue() == Value) {
13182         // The store is dead, remove it.
13183         return Chain;
13184       }
13185 
13186       // If this is a store who's preceeding store to the same location
13187       // and no one other node is chained to that store we can effectively
13188       // drop the store. Do not remove stores to undef as they may be used as
13189       // data sinks.
13190       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13191           !ST1->getBasePtr().isUndef()) {
13192         // ST1 is fully overwritten and can be elided. Combine with it's chain
13193         // value.
13194         CombineTo(ST1, ST1->getChain());
13195         return SDValue();
13196       }
13197     }
13198   }
13199 
13200   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13201   // truncating store.  We can do this even if this is already a truncstore.
13202   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13203       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13204       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13205                             ST->getMemoryVT())) {
13206     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13207                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13208   }
13209 
13210   // Only perform this optimization before the types are legal, because we
13211   // don't want to perform this optimization on every DAGCombine invocation.
13212   if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
13213                                            : !LegalTypes) {
13214     for (;;) {
13215       // There can be multiple store sequences on the same chain.
13216       // Keep trying to merge store sequences until we are unable to do so
13217       // or until we merge the last store on the chain.
13218       bool Changed = MergeConsecutiveStores(ST);
13219       if (!Changed) break;
13220       // Return N as merge only uses CombineTo and no worklist clean
13221       // up is necessary.
13222       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13223         return SDValue(N, 0);
13224     }
13225   }
13226 
13227   // Try transforming N to an indexed store.
13228   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13229     return SDValue(N, 0);
13230 
13231   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13232   //
13233   // Make sure to do this only after attempting to merge stores in order to
13234   //  avoid changing the types of some subset of stores due to visit order,
13235   //  preventing their merging.
13236   if (isa<ConstantFPSDNode>(ST->getValue())) {
13237     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13238       return NewSt;
13239   }
13240 
13241   if (SDValue NewSt = splitMergedValStore(ST))
13242     return NewSt;
13243 
13244   return ReduceLoadOpStoreWidth(N);
13245 }
13246 
13247 /// For the instruction sequence of store below, F and I values
13248 /// are bundled together as an i64 value before being stored into memory.
13249 /// Sometimes it is more efficent to generate separate stores for F and I,
13250 /// which can remove the bitwise instructions or sink them to colder places.
13251 ///
13252 ///   (store (or (zext (bitcast F to i32) to i64),
13253 ///              (shl (zext I to i64), 32)), addr)  -->
13254 ///   (store F, addr) and (store I, addr+4)
13255 ///
13256 /// Similarly, splitting for other merged store can also be beneficial, like:
13257 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13258 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13259 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13260 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13261 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13262 ///
13263 /// We allow each target to determine specifically which kind of splitting is
13264 /// supported.
13265 ///
13266 /// The store patterns are commonly seen from the simple code snippet below
13267 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13268 ///   void goo(const std::pair<int, float> &);
13269 ///   hoo() {
13270 ///     ...
13271 ///     goo(std::make_pair(tmp, ftmp));
13272 ///     ...
13273 ///   }
13274 ///
13275 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13276   if (OptLevel == CodeGenOpt::None)
13277     return SDValue();
13278 
13279   SDValue Val = ST->getValue();
13280   SDLoc DL(ST);
13281 
13282   // Match OR operand.
13283   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13284     return SDValue();
13285 
13286   // Match SHL operand and get Lower and Higher parts of Val.
13287   SDValue Op1 = Val.getOperand(0);
13288   SDValue Op2 = Val.getOperand(1);
13289   SDValue Lo, Hi;
13290   if (Op1.getOpcode() != ISD::SHL) {
13291     std::swap(Op1, Op2);
13292     if (Op1.getOpcode() != ISD::SHL)
13293       return SDValue();
13294   }
13295   Lo = Op2;
13296   Hi = Op1.getOperand(0);
13297   if (!Op1.hasOneUse())
13298     return SDValue();
13299 
13300   // Match shift amount to HalfValBitSize.
13301   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13302   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13303   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13304     return SDValue();
13305 
13306   // Lo and Hi are zero-extended from int with size less equal than 32
13307   // to i64.
13308   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13309       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13310       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13311       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13312       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13313       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13314     return SDValue();
13315 
13316   // Use the EVT of low and high parts before bitcast as the input
13317   // of target query.
13318   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13319                   ? Lo.getOperand(0).getValueType()
13320                   : Lo.getValueType();
13321   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13322                    ? Hi.getOperand(0).getValueType()
13323                    : Hi.getValueType();
13324   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13325     return SDValue();
13326 
13327   // Start to split store.
13328   unsigned Alignment = ST->getAlignment();
13329   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13330   AAMDNodes AAInfo = ST->getAAInfo();
13331 
13332   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13333   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13334   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13335   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13336 
13337   SDValue Chain = ST->getChain();
13338   SDValue Ptr = ST->getBasePtr();
13339   // Lower value store.
13340   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13341                              ST->getAlignment(), MMOFlags, AAInfo);
13342   Ptr =
13343       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13344                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13345   // Higher value store.
13346   SDValue St1 =
13347       DAG.getStore(St0, DL, Hi, Ptr,
13348                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13349                    Alignment / 2, MMOFlags, AAInfo);
13350   return St1;
13351 }
13352 
13353 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13354   SDValue InVec = N->getOperand(0);
13355   SDValue InVal = N->getOperand(1);
13356   SDValue EltNo = N->getOperand(2);
13357   SDLoc DL(N);
13358 
13359   // If the inserted element is an UNDEF, just use the input vector.
13360   if (InVal.isUndef())
13361     return InVec;
13362 
13363   EVT VT = InVec.getValueType();
13364 
13365   // Check that we know which element is being inserted
13366   if (!isa<ConstantSDNode>(EltNo))
13367     return SDValue();
13368   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13369 
13370   // Canonicalize insert_vector_elt dag nodes.
13371   // Example:
13372   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13373   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13374   //
13375   // Do this only if the child insert_vector node has one use; also
13376   // do this only if indices are both constants and Idx1 < Idx0.
13377   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13378       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13379     unsigned OtherElt = InVec.getConstantOperandVal(2);
13380     if (Elt < OtherElt) {
13381       // Swap nodes.
13382       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13383                                   InVec.getOperand(0), InVal, EltNo);
13384       AddToWorklist(NewOp.getNode());
13385       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13386                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13387     }
13388   }
13389 
13390   // If we can't generate a legal BUILD_VECTOR, exit
13391   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13392     return SDValue();
13393 
13394   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13395   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13396   // vector elements.
13397   SmallVector<SDValue, 8> Ops;
13398   // Do not combine these two vectors if the output vector will not replace
13399   // the input vector.
13400   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13401     Ops.append(InVec.getNode()->op_begin(),
13402                InVec.getNode()->op_end());
13403   } else if (InVec.isUndef()) {
13404     unsigned NElts = VT.getVectorNumElements();
13405     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13406   } else {
13407     return SDValue();
13408   }
13409 
13410   // Insert the element
13411   if (Elt < Ops.size()) {
13412     // All the operands of BUILD_VECTOR must have the same type;
13413     // we enforce that here.
13414     EVT OpVT = Ops[0].getValueType();
13415     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13416   }
13417 
13418   // Return the new vector
13419   return DAG.getBuildVector(VT, DL, Ops);
13420 }
13421 
13422 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13423     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13424   assert(!OriginalLoad->isVolatile());
13425 
13426   EVT ResultVT = EVE->getValueType(0);
13427   EVT VecEltVT = InVecVT.getVectorElementType();
13428   unsigned Align = OriginalLoad->getAlignment();
13429   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13430       VecEltVT.getTypeForEVT(*DAG.getContext()));
13431 
13432   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13433     return SDValue();
13434 
13435   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13436     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13437   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13438     return SDValue();
13439 
13440   Align = NewAlign;
13441 
13442   SDValue NewPtr = OriginalLoad->getBasePtr();
13443   SDValue Offset;
13444   EVT PtrType = NewPtr.getValueType();
13445   MachinePointerInfo MPI;
13446   SDLoc DL(EVE);
13447   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13448     int Elt = ConstEltNo->getZExtValue();
13449     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13450     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13451     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13452   } else {
13453     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13454     Offset = DAG.getNode(
13455         ISD::MUL, DL, PtrType, Offset,
13456         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13457     MPI = OriginalLoad->getPointerInfo();
13458   }
13459   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13460 
13461   // The replacement we need to do here is a little tricky: we need to
13462   // replace an extractelement of a load with a load.
13463   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13464   // Note that this replacement assumes that the extractvalue is the only
13465   // use of the load; that's okay because we don't want to perform this
13466   // transformation in other cases anyway.
13467   SDValue Load;
13468   SDValue Chain;
13469   if (ResultVT.bitsGT(VecEltVT)) {
13470     // If the result type of vextract is wider than the load, then issue an
13471     // extending load instead.
13472     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13473                                                   VecEltVT)
13474                                    ? ISD::ZEXTLOAD
13475                                    : ISD::EXTLOAD;
13476     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13477                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13478                           Align, OriginalLoad->getMemOperand()->getFlags(),
13479                           OriginalLoad->getAAInfo());
13480     Chain = Load.getValue(1);
13481   } else {
13482     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13483                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13484                        OriginalLoad->getAAInfo());
13485     Chain = Load.getValue(1);
13486     if (ResultVT.bitsLT(VecEltVT))
13487       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13488     else
13489       Load = DAG.getBitcast(ResultVT, Load);
13490   }
13491   WorklistRemover DeadNodes(*this);
13492   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13493   SDValue To[] = { Load, Chain };
13494   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13495   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13496   // worklist explicitly as well.
13497   AddToWorklist(Load.getNode());
13498   AddUsersToWorklist(Load.getNode()); // Add users too
13499   // Make sure to revisit this node to clean it up; it will usually be dead.
13500   AddToWorklist(EVE);
13501   ++OpsNarrowed;
13502   return SDValue(EVE, 0);
13503 }
13504 
13505 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13506   // (vextract (scalar_to_vector val, 0) -> val
13507   SDValue InVec = N->getOperand(0);
13508   EVT VT = InVec.getValueType();
13509   EVT NVT = N->getValueType(0);
13510 
13511   if (InVec.isUndef())
13512     return DAG.getUNDEF(NVT);
13513 
13514   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13515     // Check if the result type doesn't match the inserted element type. A
13516     // SCALAR_TO_VECTOR may truncate the inserted element and the
13517     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13518     SDValue InOp = InVec.getOperand(0);
13519     if (InOp.getValueType() != NVT) {
13520       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13521       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13522     }
13523     return InOp;
13524   }
13525 
13526   SDValue EltNo = N->getOperand(1);
13527   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13528 
13529   // extract_vector_elt (build_vector x, y), 1 -> y
13530   if (ConstEltNo &&
13531       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13532       TLI.isTypeLegal(VT) &&
13533       (InVec.hasOneUse() ||
13534        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13535     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13536     EVT InEltVT = Elt.getValueType();
13537 
13538     // Sometimes build_vector's scalar input types do not match result type.
13539     if (NVT == InEltVT)
13540       return Elt;
13541 
13542     // TODO: It may be useful to truncate if free if the build_vector implicitly
13543     // converts.
13544   }
13545 
13546   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
13547   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13548       ConstEltNo->isNullValue() && VT.isInteger()) {
13549     SDValue BCSrc = InVec.getOperand(0);
13550     if (BCSrc.getValueType().isScalarInteger())
13551       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13552   }
13553 
13554   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13555   //
13556   // This only really matters if the index is non-constant since other combines
13557   // on the constant elements already work.
13558   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13559       EltNo == InVec.getOperand(2)) {
13560     SDValue Elt = InVec.getOperand(1);
13561     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13562   }
13563 
13564   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13565   // We only perform this optimization before the op legalization phase because
13566   // we may introduce new vector instructions which are not backed by TD
13567   // patterns. For example on AVX, extracting elements from a wide vector
13568   // without using extract_subvector. However, if we can find an underlying
13569   // scalar value, then we can always use that.
13570   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13571     int NumElem = VT.getVectorNumElements();
13572     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13573     // Find the new index to extract from.
13574     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13575 
13576     // Extracting an undef index is undef.
13577     if (OrigElt == -1)
13578       return DAG.getUNDEF(NVT);
13579 
13580     // Select the right vector half to extract from.
13581     SDValue SVInVec;
13582     if (OrigElt < NumElem) {
13583       SVInVec = InVec->getOperand(0);
13584     } else {
13585       SVInVec = InVec->getOperand(1);
13586       OrigElt -= NumElem;
13587     }
13588 
13589     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13590       SDValue InOp = SVInVec.getOperand(OrigElt);
13591       if (InOp.getValueType() != NVT) {
13592         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13593         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13594       }
13595 
13596       return InOp;
13597     }
13598 
13599     // FIXME: We should handle recursing on other vector shuffles and
13600     // scalar_to_vector here as well.
13601 
13602     if (!LegalOperations) {
13603       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13604       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13605                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13606     }
13607   }
13608 
13609   bool BCNumEltsChanged = false;
13610   EVT ExtVT = VT.getVectorElementType();
13611   EVT LVT = ExtVT;
13612 
13613   // If the result of load has to be truncated, then it's not necessarily
13614   // profitable.
13615   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13616     return SDValue();
13617 
13618   if (InVec.getOpcode() == ISD::BITCAST) {
13619     // Don't duplicate a load with other uses.
13620     if (!InVec.hasOneUse())
13621       return SDValue();
13622 
13623     EVT BCVT = InVec.getOperand(0).getValueType();
13624     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13625       return SDValue();
13626     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13627       BCNumEltsChanged = true;
13628     InVec = InVec.getOperand(0);
13629     ExtVT = BCVT.getVectorElementType();
13630   }
13631 
13632   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13633   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13634       ISD::isNormalLoad(InVec.getNode()) &&
13635       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13636     SDValue Index = N->getOperand(1);
13637     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13638       if (!OrigLoad->isVolatile()) {
13639         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13640                                                              OrigLoad);
13641       }
13642     }
13643   }
13644 
13645   // Perform only after legalization to ensure build_vector / vector_shuffle
13646   // optimizations have already been done.
13647   if (!LegalOperations) return SDValue();
13648 
13649   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13650   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13651   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13652 
13653   if (ConstEltNo) {
13654     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13655 
13656     LoadSDNode *LN0 = nullptr;
13657     const ShuffleVectorSDNode *SVN = nullptr;
13658     if (ISD::isNormalLoad(InVec.getNode())) {
13659       LN0 = cast<LoadSDNode>(InVec);
13660     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13661                InVec.getOperand(0).getValueType() == ExtVT &&
13662                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13663       // Don't duplicate a load with other uses.
13664       if (!InVec.hasOneUse())
13665         return SDValue();
13666 
13667       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13668     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13669       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13670       // =>
13671       // (load $addr+1*size)
13672 
13673       // Don't duplicate a load with other uses.
13674       if (!InVec.hasOneUse())
13675         return SDValue();
13676 
13677       // If the bit convert changed the number of elements, it is unsafe
13678       // to examine the mask.
13679       if (BCNumEltsChanged)
13680         return SDValue();
13681 
13682       // Select the input vector, guarding against out of range extract vector.
13683       unsigned NumElems = VT.getVectorNumElements();
13684       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13685       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13686 
13687       if (InVec.getOpcode() == ISD::BITCAST) {
13688         // Don't duplicate a load with other uses.
13689         if (!InVec.hasOneUse())
13690           return SDValue();
13691 
13692         InVec = InVec.getOperand(0);
13693       }
13694       if (ISD::isNormalLoad(InVec.getNode())) {
13695         LN0 = cast<LoadSDNode>(InVec);
13696         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13697         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13698       }
13699     }
13700 
13701     // Make sure we found a non-volatile load and the extractelement is
13702     // the only use.
13703     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13704       return SDValue();
13705 
13706     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13707     if (Elt == -1)
13708       return DAG.getUNDEF(LVT);
13709 
13710     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13711   }
13712 
13713   return SDValue();
13714 }
13715 
13716 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13717 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13718   // We perform this optimization post type-legalization because
13719   // the type-legalizer often scalarizes integer-promoted vectors.
13720   // Performing this optimization before may create bit-casts which
13721   // will be type-legalized to complex code sequences.
13722   // We perform this optimization only before the operation legalizer because we
13723   // may introduce illegal operations.
13724   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13725     return SDValue();
13726 
13727   unsigned NumInScalars = N->getNumOperands();
13728   SDLoc DL(N);
13729   EVT VT = N->getValueType(0);
13730 
13731   // Check to see if this is a BUILD_VECTOR of a bunch of values
13732   // which come from any_extend or zero_extend nodes. If so, we can create
13733   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13734   // optimizations. We do not handle sign-extend because we can't fill the sign
13735   // using shuffles.
13736   EVT SourceType = MVT::Other;
13737   bool AllAnyExt = true;
13738 
13739   for (unsigned i = 0; i != NumInScalars; ++i) {
13740     SDValue In = N->getOperand(i);
13741     // Ignore undef inputs.
13742     if (In.isUndef()) continue;
13743 
13744     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13745     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13746 
13747     // Abort if the element is not an extension.
13748     if (!ZeroExt && !AnyExt) {
13749       SourceType = MVT::Other;
13750       break;
13751     }
13752 
13753     // The input is a ZeroExt or AnyExt. Check the original type.
13754     EVT InTy = In.getOperand(0).getValueType();
13755 
13756     // Check that all of the widened source types are the same.
13757     if (SourceType == MVT::Other)
13758       // First time.
13759       SourceType = InTy;
13760     else if (InTy != SourceType) {
13761       // Multiple income types. Abort.
13762       SourceType = MVT::Other;
13763       break;
13764     }
13765 
13766     // Check if all of the extends are ANY_EXTENDs.
13767     AllAnyExt &= AnyExt;
13768   }
13769 
13770   // In order to have valid types, all of the inputs must be extended from the
13771   // same source type and all of the inputs must be any or zero extend.
13772   // Scalar sizes must be a power of two.
13773   EVT OutScalarTy = VT.getScalarType();
13774   bool ValidTypes = SourceType != MVT::Other &&
13775                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13776                  isPowerOf2_32(SourceType.getSizeInBits());
13777 
13778   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13779   // turn into a single shuffle instruction.
13780   if (!ValidTypes)
13781     return SDValue();
13782 
13783   bool isLE = DAG.getDataLayout().isLittleEndian();
13784   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13785   assert(ElemRatio > 1 && "Invalid element size ratio");
13786   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13787                                DAG.getConstant(0, DL, SourceType);
13788 
13789   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13790   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13791 
13792   // Populate the new build_vector
13793   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13794     SDValue Cast = N->getOperand(i);
13795     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13796             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13797             Cast.isUndef()) && "Invalid cast opcode");
13798     SDValue In;
13799     if (Cast.isUndef())
13800       In = DAG.getUNDEF(SourceType);
13801     else
13802       In = Cast->getOperand(0);
13803     unsigned Index = isLE ? (i * ElemRatio) :
13804                             (i * ElemRatio + (ElemRatio - 1));
13805 
13806     assert(Index < Ops.size() && "Invalid index");
13807     Ops[Index] = In;
13808   }
13809 
13810   // The type of the new BUILD_VECTOR node.
13811   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13812   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13813          "Invalid vector size");
13814   // Check if the new vector type is legal.
13815   if (!isTypeLegal(VecVT)) return SDValue();
13816 
13817   // Make the new BUILD_VECTOR.
13818   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13819 
13820   // The new BUILD_VECTOR node has the potential to be further optimized.
13821   AddToWorklist(BV.getNode());
13822   // Bitcast to the desired type.
13823   return DAG.getBitcast(VT, BV);
13824 }
13825 
13826 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
13827   EVT VT = N->getValueType(0);
13828 
13829   unsigned NumInScalars = N->getNumOperands();
13830   SDLoc DL(N);
13831 
13832   EVT SrcVT = MVT::Other;
13833   unsigned Opcode = ISD::DELETED_NODE;
13834   unsigned NumDefs = 0;
13835 
13836   for (unsigned i = 0; i != NumInScalars; ++i) {
13837     SDValue In = N->getOperand(i);
13838     unsigned Opc = In.getOpcode();
13839 
13840     if (Opc == ISD::UNDEF)
13841       continue;
13842 
13843     // If all scalar values are floats and converted from integers.
13844     if (Opcode == ISD::DELETED_NODE &&
13845         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
13846       Opcode = Opc;
13847     }
13848 
13849     if (Opc != Opcode)
13850       return SDValue();
13851 
13852     EVT InVT = In.getOperand(0).getValueType();
13853 
13854     // If all scalar values are typed differently, bail out. It's chosen to
13855     // simplify BUILD_VECTOR of integer types.
13856     if (SrcVT == MVT::Other)
13857       SrcVT = InVT;
13858     if (SrcVT != InVT)
13859       return SDValue();
13860     NumDefs++;
13861   }
13862 
13863   // If the vector has just one element defined, it's not worth to fold it into
13864   // a vectorized one.
13865   if (NumDefs < 2)
13866     return SDValue();
13867 
13868   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
13869          && "Should only handle conversion from integer to float.");
13870   assert(SrcVT != MVT::Other && "Cannot determine source type!");
13871 
13872   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13873 
13874   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13875     return SDValue();
13876 
13877   // Just because the floating-point vector type is legal does not necessarily
13878   // mean that the corresponding integer vector type is.
13879   if (!isTypeLegal(NVT))
13880     return SDValue();
13881 
13882   SmallVector<SDValue, 8> Opnds;
13883   for (unsigned i = 0; i != NumInScalars; ++i) {
13884     SDValue In = N->getOperand(i);
13885 
13886     if (In.isUndef())
13887       Opnds.push_back(DAG.getUNDEF(SrcVT));
13888     else
13889       Opnds.push_back(In.getOperand(0));
13890   }
13891   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13892   AddToWorklist(BV.getNode());
13893 
13894   return DAG.getNode(Opcode, DL, VT, BV);
13895 }
13896 
13897 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
13898                                            ArrayRef<int> VectorMask,
13899                                            SDValue VecIn1, SDValue VecIn2,
13900                                            unsigned LeftIdx) {
13901   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13902   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13903 
13904   EVT VT = N->getValueType(0);
13905   EVT InVT1 = VecIn1.getValueType();
13906   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13907 
13908   unsigned Vec2Offset = InVT1.getVectorNumElements();
13909   unsigned NumElems = VT.getVectorNumElements();
13910   unsigned ShuffleNumElems = NumElems;
13911 
13912   // We can't generate a shuffle node with mismatched input and output types.
13913   // Try to make the types match the type of the output.
13914   if (InVT1 != VT || InVT2 != VT) {
13915     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13916       // If the output vector length is a multiple of both input lengths,
13917       // we can concatenate them and pad the rest with undefs.
13918       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
13919       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
13920       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
13921       ConcatOps[0] = VecIn1;
13922       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
13923       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13924       VecIn2 = SDValue();
13925     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
13926       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
13927         return SDValue();
13928 
13929       if (!VecIn2.getNode()) {
13930         // If we only have one input vector, and it's twice the size of the
13931         // output, split it in two.
13932         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
13933                              DAG.getConstant(NumElems, DL, IdxTy));
13934         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
13935         // Since we now have shorter input vectors, adjust the offset of the
13936         // second vector's start.
13937         Vec2Offset = NumElems;
13938       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
13939         // VecIn1 is wider than the output, and we have another, possibly
13940         // smaller input. Pad the smaller input with undefs, shuffle at the
13941         // input vector width, and extract the output.
13942         // The shuffle type is different than VT, so check legality again.
13943         if (LegalOperations &&
13944             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
13945           return SDValue();
13946 
13947         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
13948         // lower it back into a BUILD_VECTOR. So if the inserted type is
13949         // illegal, don't even try.
13950         if (InVT1 != InVT2) {
13951           if (!TLI.isTypeLegal(InVT2))
13952             return SDValue();
13953           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
13954                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
13955         }
13956         ShuffleNumElems = NumElems * 2;
13957       } else {
13958         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
13959         // than VecIn1. We can't handle this for now - this case will disappear
13960         // when we start sorting the vectors by type.
13961         return SDValue();
13962       }
13963     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
13964                InVT1.getSizeInBits() == VT.getSizeInBits()) {
13965       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
13966       ConcatOps[0] = VecIn2;
13967       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13968     } else {
13969       // TODO: Support cases where the length mismatch isn't exactly by a
13970       // factor of 2.
13971       // TODO: Move this check upwards, so that if we have bad type
13972       // mismatches, we don't create any DAG nodes.
13973       return SDValue();
13974     }
13975   }
13976 
13977   // Initialize mask to undef.
13978   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13979 
13980   // Only need to run up to the number of elements actually used, not the
13981   // total number of elements in the shuffle - if we are shuffling a wider
13982   // vector, the high lanes should be set to undef.
13983   for (unsigned i = 0; i != NumElems; ++i) {
13984     if (VectorMask[i] <= 0)
13985       continue;
13986 
13987     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
13988     if (VectorMask[i] == (int)LeftIdx) {
13989       Mask[i] = ExtIndex;
13990     } else if (VectorMask[i] == (int)LeftIdx + 1) {
13991       Mask[i] = Vec2Offset + ExtIndex;
13992     }
13993   }
13994 
13995   // The type the input vectors may have changed above.
13996   InVT1 = VecIn1.getValueType();
13997 
13998   // If we already have a VecIn2, it should have the same type as VecIn1.
13999   // If we don't, get an undef/zero vector of the appropriate type.
14000   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14001   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14002 
14003   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14004   if (ShuffleNumElems > NumElems)
14005     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14006 
14007   return Shuffle;
14008 }
14009 
14010 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14011 // operations. If the types of the vectors we're extracting from allow it,
14012 // turn this into a vector_shuffle node.
14013 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14014   SDLoc DL(N);
14015   EVT VT = N->getValueType(0);
14016 
14017   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14018   if (!isTypeLegal(VT))
14019     return SDValue();
14020 
14021   // May only combine to shuffle after legalize if shuffle is legal.
14022   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14023     return SDValue();
14024 
14025   bool UsesZeroVector = false;
14026   unsigned NumElems = N->getNumOperands();
14027 
14028   // Record, for each element of the newly built vector, which input vector
14029   // that element comes from. -1 stands for undef, 0 for the zero vector,
14030   // and positive values for the input vectors.
14031   // VectorMask maps each element to its vector number, and VecIn maps vector
14032   // numbers to their initial SDValues.
14033 
14034   SmallVector<int, 8> VectorMask(NumElems, -1);
14035   SmallVector<SDValue, 8> VecIn;
14036   VecIn.push_back(SDValue());
14037 
14038   for (unsigned i = 0; i != NumElems; ++i) {
14039     SDValue Op = N->getOperand(i);
14040 
14041     if (Op.isUndef())
14042       continue;
14043 
14044     // See if we can use a blend with a zero vector.
14045     // TODO: Should we generalize this to a blend with an arbitrary constant
14046     // vector?
14047     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14048       UsesZeroVector = true;
14049       VectorMask[i] = 0;
14050       continue;
14051     }
14052 
14053     // Not an undef or zero. If the input is something other than an
14054     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14055     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14056         !isa<ConstantSDNode>(Op.getOperand(1)))
14057       return SDValue();
14058 
14059     SDValue ExtractedFromVec = Op.getOperand(0);
14060 
14061     // All inputs must have the same element type as the output.
14062     if (VT.getVectorElementType() !=
14063         ExtractedFromVec.getValueType().getVectorElementType())
14064       return SDValue();
14065 
14066     // Have we seen this input vector before?
14067     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14068     // a map back from SDValues to numbers isn't worth it.
14069     unsigned Idx = std::distance(
14070         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14071     if (Idx == VecIn.size())
14072       VecIn.push_back(ExtractedFromVec);
14073 
14074     VectorMask[i] = Idx;
14075   }
14076 
14077   // If we didn't find at least one input vector, bail out.
14078   if (VecIn.size() < 2)
14079     return SDValue();
14080 
14081   // TODO: We want to sort the vectors by descending length, so that adjacent
14082   // pairs have similar length, and the longer vector is always first in the
14083   // pair.
14084 
14085   // TODO: Should this fire if some of the input vectors has illegal type (like
14086   // it does now), or should we let legalization run its course first?
14087 
14088   // Shuffle phase:
14089   // Take pairs of vectors, and shuffle them so that the result has elements
14090   // from these vectors in the correct places.
14091   // For example, given:
14092   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14093   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14094   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14095   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14096   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14097   // We will generate:
14098   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14099   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14100   SmallVector<SDValue, 4> Shuffles;
14101   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14102     unsigned LeftIdx = 2 * In + 1;
14103     SDValue VecLeft = VecIn[LeftIdx];
14104     SDValue VecRight =
14105         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14106 
14107     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14108                                                 VecRight, LeftIdx))
14109       Shuffles.push_back(Shuffle);
14110     else
14111       return SDValue();
14112   }
14113 
14114   // If we need the zero vector as an "ingredient" in the blend tree, add it
14115   // to the list of shuffles.
14116   if (UsesZeroVector)
14117     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14118                                       : DAG.getConstantFP(0.0, DL, VT));
14119 
14120   // If we only have one shuffle, we're done.
14121   if (Shuffles.size() == 1)
14122     return Shuffles[0];
14123 
14124   // Update the vector mask to point to the post-shuffle vectors.
14125   for (int &Vec : VectorMask)
14126     if (Vec == 0)
14127       Vec = Shuffles.size() - 1;
14128     else
14129       Vec = (Vec - 1) / 2;
14130 
14131   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14132   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14133   // generate:
14134   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14135   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14136   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14137   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14138   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14139   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14140   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14141 
14142   // Make sure the initial size of the shuffle list is even.
14143   if (Shuffles.size() % 2)
14144     Shuffles.push_back(DAG.getUNDEF(VT));
14145 
14146   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14147     if (CurSize % 2) {
14148       Shuffles[CurSize] = DAG.getUNDEF(VT);
14149       CurSize++;
14150     }
14151     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14152       int Left = 2 * In;
14153       int Right = 2 * In + 1;
14154       SmallVector<int, 8> Mask(NumElems, -1);
14155       for (unsigned i = 0; i != NumElems; ++i) {
14156         if (VectorMask[i] == Left) {
14157           Mask[i] = i;
14158           VectorMask[i] = In;
14159         } else if (VectorMask[i] == Right) {
14160           Mask[i] = i + NumElems;
14161           VectorMask[i] = In;
14162         }
14163       }
14164 
14165       Shuffles[In] =
14166           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14167     }
14168   }
14169 
14170   return Shuffles[0];
14171 }
14172 
14173 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14174   EVT VT = N->getValueType(0);
14175 
14176   // A vector built entirely of undefs is undef.
14177   if (ISD::allOperandsUndef(N))
14178     return DAG.getUNDEF(VT);
14179 
14180   // Check if we can express BUILD VECTOR via subvector extract.
14181   if (!LegalTypes && (N->getNumOperands() > 1)) {
14182     SDValue Op0 = N->getOperand(0);
14183     auto checkElem = [&](SDValue Op) -> uint64_t {
14184       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14185           (Op0.getOperand(0) == Op.getOperand(0)))
14186         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14187           return CNode->getZExtValue();
14188       return -1;
14189     };
14190 
14191     int Offset = checkElem(Op0);
14192     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14193       if (Offset + i != checkElem(N->getOperand(i))) {
14194         Offset = -1;
14195         break;
14196       }
14197     }
14198 
14199     if ((Offset == 0) &&
14200         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14201       return Op0.getOperand(0);
14202     if ((Offset != -1) &&
14203         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14204          0)) // IDX must be multiple of output size.
14205       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14206                          Op0.getOperand(0), Op0.getOperand(1));
14207   }
14208 
14209   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14210     return V;
14211 
14212   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14213     return V;
14214 
14215   if (SDValue V = reduceBuildVecToShuffle(N))
14216     return V;
14217 
14218   return SDValue();
14219 }
14220 
14221 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14222   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14223   EVT OpVT = N->getOperand(0).getValueType();
14224 
14225   // If the operands are legal vectors, leave them alone.
14226   if (TLI.isTypeLegal(OpVT))
14227     return SDValue();
14228 
14229   SDLoc DL(N);
14230   EVT VT = N->getValueType(0);
14231   SmallVector<SDValue, 8> Ops;
14232 
14233   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14234   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14235 
14236   // Keep track of what we encounter.
14237   bool AnyInteger = false;
14238   bool AnyFP = false;
14239   for (const SDValue &Op : N->ops()) {
14240     if (ISD::BITCAST == Op.getOpcode() &&
14241         !Op.getOperand(0).getValueType().isVector())
14242       Ops.push_back(Op.getOperand(0));
14243     else if (ISD::UNDEF == Op.getOpcode())
14244       Ops.push_back(ScalarUndef);
14245     else
14246       return SDValue();
14247 
14248     // Note whether we encounter an integer or floating point scalar.
14249     // If it's neither, bail out, it could be something weird like x86mmx.
14250     EVT LastOpVT = Ops.back().getValueType();
14251     if (LastOpVT.isFloatingPoint())
14252       AnyFP = true;
14253     else if (LastOpVT.isInteger())
14254       AnyInteger = true;
14255     else
14256       return SDValue();
14257   }
14258 
14259   // If any of the operands is a floating point scalar bitcast to a vector,
14260   // use floating point types throughout, and bitcast everything.
14261   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14262   if (AnyFP) {
14263     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14264     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14265     if (AnyInteger) {
14266       for (SDValue &Op : Ops) {
14267         if (Op.getValueType() == SVT)
14268           continue;
14269         if (Op.isUndef())
14270           Op = ScalarUndef;
14271         else
14272           Op = DAG.getBitcast(SVT, Op);
14273       }
14274     }
14275   }
14276 
14277   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14278                                VT.getSizeInBits() / SVT.getSizeInBits());
14279   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14280 }
14281 
14282 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14283 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14284 // most two distinct vectors the same size as the result, attempt to turn this
14285 // into a legal shuffle.
14286 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14287   EVT VT = N->getValueType(0);
14288   EVT OpVT = N->getOperand(0).getValueType();
14289   int NumElts = VT.getVectorNumElements();
14290   int NumOpElts = OpVT.getVectorNumElements();
14291 
14292   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14293   SmallVector<int, 8> Mask;
14294 
14295   for (SDValue Op : N->ops()) {
14296     // Peek through any bitcast.
14297     while (Op.getOpcode() == ISD::BITCAST)
14298       Op = Op.getOperand(0);
14299 
14300     // UNDEF nodes convert to UNDEF shuffle mask values.
14301     if (Op.isUndef()) {
14302       Mask.append((unsigned)NumOpElts, -1);
14303       continue;
14304     }
14305 
14306     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14307       return SDValue();
14308 
14309     // What vector are we extracting the subvector from and at what index?
14310     SDValue ExtVec = Op.getOperand(0);
14311 
14312     // We want the EVT of the original extraction to correctly scale the
14313     // extraction index.
14314     EVT ExtVT = ExtVec.getValueType();
14315 
14316     // Peek through any bitcast.
14317     while (ExtVec.getOpcode() == ISD::BITCAST)
14318       ExtVec = ExtVec.getOperand(0);
14319 
14320     // UNDEF nodes convert to UNDEF shuffle mask values.
14321     if (ExtVec.isUndef()) {
14322       Mask.append((unsigned)NumOpElts, -1);
14323       continue;
14324     }
14325 
14326     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14327       return SDValue();
14328     int ExtIdx = Op.getConstantOperandVal(1);
14329 
14330     // Ensure that we are extracting a subvector from a vector the same
14331     // size as the result.
14332     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14333       return SDValue();
14334 
14335     // Scale the subvector index to account for any bitcast.
14336     int NumExtElts = ExtVT.getVectorNumElements();
14337     if (0 == (NumExtElts % NumElts))
14338       ExtIdx /= (NumExtElts / NumElts);
14339     else if (0 == (NumElts % NumExtElts))
14340       ExtIdx *= (NumElts / NumExtElts);
14341     else
14342       return SDValue();
14343 
14344     // At most we can reference 2 inputs in the final shuffle.
14345     if (SV0.isUndef() || SV0 == ExtVec) {
14346       SV0 = ExtVec;
14347       for (int i = 0; i != NumOpElts; ++i)
14348         Mask.push_back(i + ExtIdx);
14349     } else if (SV1.isUndef() || SV1 == ExtVec) {
14350       SV1 = ExtVec;
14351       for (int i = 0; i != NumOpElts; ++i)
14352         Mask.push_back(i + ExtIdx + NumElts);
14353     } else {
14354       return SDValue();
14355     }
14356   }
14357 
14358   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14359     return SDValue();
14360 
14361   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14362                               DAG.getBitcast(VT, SV1), Mask);
14363 }
14364 
14365 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14366   // If we only have one input vector, we don't need to do any concatenation.
14367   if (N->getNumOperands() == 1)
14368     return N->getOperand(0);
14369 
14370   // Check if all of the operands are undefs.
14371   EVT VT = N->getValueType(0);
14372   if (ISD::allOperandsUndef(N))
14373     return DAG.getUNDEF(VT);
14374 
14375   // Optimize concat_vectors where all but the first of the vectors are undef.
14376   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14377         return Op.isUndef();
14378       })) {
14379     SDValue In = N->getOperand(0);
14380     assert(In.getValueType().isVector() && "Must concat vectors");
14381 
14382     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14383     if (In->getOpcode() == ISD::BITCAST &&
14384         !In->getOperand(0)->getValueType(0).isVector()) {
14385       SDValue Scalar = In->getOperand(0);
14386 
14387       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14388       // look through the trunc so we can still do the transform:
14389       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14390       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14391           !TLI.isTypeLegal(Scalar.getValueType()) &&
14392           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14393         Scalar = Scalar->getOperand(0);
14394 
14395       EVT SclTy = Scalar->getValueType(0);
14396 
14397       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14398         return SDValue();
14399 
14400       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14401       if (VNTNumElms < 2)
14402         return SDValue();
14403 
14404       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14405       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14406         return SDValue();
14407 
14408       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14409       return DAG.getBitcast(VT, Res);
14410     }
14411   }
14412 
14413   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14414   // We have already tested above for an UNDEF only concatenation.
14415   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14416   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14417   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14418     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14419   };
14420   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14421     SmallVector<SDValue, 8> Opnds;
14422     EVT SVT = VT.getScalarType();
14423 
14424     EVT MinVT = SVT;
14425     if (!SVT.isFloatingPoint()) {
14426       // If BUILD_VECTOR are from built from integer, they may have different
14427       // operand types. Get the smallest type and truncate all operands to it.
14428       bool FoundMinVT = false;
14429       for (const SDValue &Op : N->ops())
14430         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14431           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14432           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14433           FoundMinVT = true;
14434         }
14435       assert(FoundMinVT && "Concat vector type mismatch");
14436     }
14437 
14438     for (const SDValue &Op : N->ops()) {
14439       EVT OpVT = Op.getValueType();
14440       unsigned NumElts = OpVT.getVectorNumElements();
14441 
14442       if (ISD::UNDEF == Op.getOpcode())
14443         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14444 
14445       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14446         if (SVT.isFloatingPoint()) {
14447           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14448           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14449         } else {
14450           for (unsigned i = 0; i != NumElts; ++i)
14451             Opnds.push_back(
14452                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14453         }
14454       }
14455     }
14456 
14457     assert(VT.getVectorNumElements() == Opnds.size() &&
14458            "Concat vector type mismatch");
14459     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14460   }
14461 
14462   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14463   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14464     return V;
14465 
14466   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14467   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14468     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14469       return V;
14470 
14471   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14472   // nodes often generate nop CONCAT_VECTOR nodes.
14473   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14474   // place the incoming vectors at the exact same location.
14475   SDValue SingleSource = SDValue();
14476   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14477 
14478   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14479     SDValue Op = N->getOperand(i);
14480 
14481     if (Op.isUndef())
14482       continue;
14483 
14484     // Check if this is the identity extract:
14485     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14486       return SDValue();
14487 
14488     // Find the single incoming vector for the extract_subvector.
14489     if (SingleSource.getNode()) {
14490       if (Op.getOperand(0) != SingleSource)
14491         return SDValue();
14492     } else {
14493       SingleSource = Op.getOperand(0);
14494 
14495       // Check the source type is the same as the type of the result.
14496       // If not, this concat may extend the vector, so we can not
14497       // optimize it away.
14498       if (SingleSource.getValueType() != N->getValueType(0))
14499         return SDValue();
14500     }
14501 
14502     unsigned IdentityIndex = i * PartNumElem;
14503     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14504     // The extract index must be constant.
14505     if (!CS)
14506       return SDValue();
14507 
14508     // Check that we are reading from the identity index.
14509     if (CS->getZExtValue() != IdentityIndex)
14510       return SDValue();
14511   }
14512 
14513   if (SingleSource.getNode())
14514     return SingleSource;
14515 
14516   return SDValue();
14517 }
14518 
14519 /// If we are extracting a subvector produced by a wide binary operator with at
14520 /// at least one operand that was the result of a vector concatenation, then try
14521 /// to use the narrow vector operands directly to avoid the concatenation and
14522 /// extraction.
14523 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
14524   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
14525   // some of these bailouts with other transforms.
14526 
14527   // The extract index must be a constant, so we can map it to a concat operand.
14528   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14529   if (!ExtractIndex)
14530     return SDValue();
14531 
14532   // Only handle the case where we are doubling and then halving. A larger ratio
14533   // may require more than two narrow binops to replace the wide binop.
14534   EVT VT = Extract->getValueType(0);
14535   unsigned NumElems = VT.getVectorNumElements();
14536   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
14537          "Extract index is not a multiple of the vector length.");
14538   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
14539     return SDValue();
14540 
14541   // We are looking for an optionally bitcasted wide vector binary operator
14542   // feeding an extract subvector.
14543   SDValue BinOp = Extract->getOperand(0);
14544   if (BinOp.getOpcode() == ISD::BITCAST)
14545     BinOp = BinOp.getOperand(0);
14546 
14547   // TODO: The motivating case for this transform is an x86 AVX1 target. That
14548   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
14549   // flavors, but no other 256-bit integer support. This could be extended to
14550   // handle any binop, but that may require fixing/adding other folds to avoid
14551   // codegen regressions.
14552   unsigned BOpcode = BinOp.getOpcode();
14553   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
14554     return SDValue();
14555 
14556   // The binop must be a vector type, so we can chop it in half.
14557   EVT WideBVT = BinOp.getValueType();
14558   if (!WideBVT.isVector())
14559     return SDValue();
14560 
14561   // Bail out if the target does not support a narrower version of the binop.
14562   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
14563                                    WideBVT.getVectorNumElements() / 2);
14564   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14565   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
14566     return SDValue();
14567 
14568   // Peek through bitcasts of the binary operator operands if needed.
14569   SDValue LHS = BinOp.getOperand(0);
14570   if (LHS.getOpcode() == ISD::BITCAST)
14571     LHS = LHS.getOperand(0);
14572 
14573   SDValue RHS = BinOp.getOperand(1);
14574   if (RHS.getOpcode() == ISD::BITCAST)
14575     RHS = RHS.getOperand(0);
14576 
14577   // We need at least one concatenation operation of a binop operand to make
14578   // this transform worthwhile. The concat must double the input vector sizes.
14579   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
14580   bool ConcatL =
14581       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
14582   bool ConcatR =
14583       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
14584   if (!ConcatL && !ConcatR)
14585     return SDValue();
14586 
14587   // If one of the binop operands was not the result of a concat, we must
14588   // extract a half-sized operand for our new narrow binop. We can't just reuse
14589   // the original extract index operand because we may have bitcasted.
14590   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
14591   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
14592   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
14593   SDLoc DL(Extract);
14594 
14595   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
14596   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
14597   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
14598   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
14599                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14600                                     BinOp.getOperand(0),
14601                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14602 
14603   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
14604                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
14605                                     BinOp.getOperand(1),
14606                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
14607 
14608   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
14609   return DAG.getBitcast(VT, NarrowBinOp);
14610 }
14611 
14612 /// If we are extracting a subvector from a wide vector load, convert to a
14613 /// narrow load to eliminate the extraction:
14614 /// (extract_subvector (load wide vector)) --> (load narrow vector)
14615 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
14616   // TODO: Add support for big-endian. The offset calculation must be adjusted.
14617   if (DAG.getDataLayout().isBigEndian())
14618     return SDValue();
14619 
14620   // TODO: The one-use check is overly conservative. Check the cost of the
14621   // extract instead or remove that condition entirely.
14622   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
14623   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
14624   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
14625       !ExtIdx)
14626     return SDValue();
14627 
14628   // The narrow load will be offset from the base address of the old load if
14629   // we are extracting from something besides index 0 (little-endian).
14630   EVT VT = Extract->getValueType(0);
14631   SDLoc DL(Extract);
14632   SDValue BaseAddr = Ld->getOperand(1);
14633   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
14634 
14635   // TODO: Use "BaseIndexOffset" to make this more effective.
14636   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
14637   MachineFunction &MF = DAG.getMachineFunction();
14638   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
14639                                                    VT.getStoreSize());
14640   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
14641   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
14642   return NewLd;
14643 }
14644 
14645 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14646   EVT NVT = N->getValueType(0);
14647   SDValue V = N->getOperand(0);
14648 
14649   // Extract from UNDEF is UNDEF.
14650   if (V.isUndef())
14651     return DAG.getUNDEF(NVT);
14652 
14653   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
14654     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
14655       return NarrowLoad;
14656 
14657   // Combine:
14658   //    (extract_subvec (concat V1, V2, ...), i)
14659   // Into:
14660   //    Vi if possible
14661   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14662   // type.
14663   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14664       isa<ConstantSDNode>(N->getOperand(1)) &&
14665       V->getOperand(0).getValueType() == NVT) {
14666     unsigned Idx = N->getConstantOperandVal(1);
14667     unsigned NumElems = NVT.getVectorNumElements();
14668     assert((Idx % NumElems) == 0 &&
14669            "IDX in concat is not a multiple of the result vector length.");
14670     return V->getOperand(Idx / NumElems);
14671   }
14672 
14673   // Skip bitcasting
14674   if (V->getOpcode() == ISD::BITCAST)
14675     V = V.getOperand(0);
14676 
14677   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14678     // Handle only simple case where vector being inserted and vector
14679     // being extracted are of same size.
14680     EVT SmallVT = V->getOperand(1).getValueType();
14681     if (!NVT.bitsEq(SmallVT))
14682       return SDValue();
14683 
14684     // Only handle cases where both indexes are constants.
14685     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14686     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14687 
14688     if (InsIdx && ExtIdx) {
14689       // Combine:
14690       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14691       // Into:
14692       //    indices are equal or bit offsets are equal => V1
14693       //    otherwise => (extract_subvec V1, ExtIdx)
14694       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14695           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14696         return DAG.getBitcast(NVT, V->getOperand(1));
14697       return DAG.getNode(
14698           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14699           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14700           N->getOperand(1));
14701     }
14702   }
14703 
14704   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
14705     return NarrowBOp;
14706 
14707   return SDValue();
14708 }
14709 
14710 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14711                                                  SDValue V, SelectionDAG &DAG) {
14712   SDLoc DL(V);
14713   EVT VT = V.getValueType();
14714 
14715   switch (V.getOpcode()) {
14716   default:
14717     return V;
14718 
14719   case ISD::CONCAT_VECTORS: {
14720     EVT OpVT = V->getOperand(0).getValueType();
14721     int OpSize = OpVT.getVectorNumElements();
14722     SmallBitVector OpUsedElements(OpSize, false);
14723     bool FoundSimplification = false;
14724     SmallVector<SDValue, 4> NewOps;
14725     NewOps.reserve(V->getNumOperands());
14726     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14727       SDValue Op = V->getOperand(i);
14728       bool OpUsed = false;
14729       for (int j = 0; j < OpSize; ++j)
14730         if (UsedElements[i * OpSize + j]) {
14731           OpUsedElements[j] = true;
14732           OpUsed = true;
14733         }
14734       NewOps.push_back(
14735           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
14736                  : DAG.getUNDEF(OpVT));
14737       FoundSimplification |= Op == NewOps.back();
14738       OpUsedElements.reset();
14739     }
14740     if (FoundSimplification)
14741       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
14742     return V;
14743   }
14744 
14745   case ISD::INSERT_SUBVECTOR: {
14746     SDValue BaseV = V->getOperand(0);
14747     SDValue SubV = V->getOperand(1);
14748     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
14749     if (!IdxN)
14750       return V;
14751 
14752     int SubSize = SubV.getValueType().getVectorNumElements();
14753     int Idx = IdxN->getZExtValue();
14754     bool SubVectorUsed = false;
14755     SmallBitVector SubUsedElements(SubSize, false);
14756     for (int i = 0; i < SubSize; ++i)
14757       if (UsedElements[i + Idx]) {
14758         SubVectorUsed = true;
14759         SubUsedElements[i] = true;
14760         UsedElements[i + Idx] = false;
14761       }
14762 
14763     // Now recurse on both the base and sub vectors.
14764     SDValue SimplifiedSubV =
14765         SubVectorUsed
14766             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
14767             : DAG.getUNDEF(SubV.getValueType());
14768     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
14769     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
14770       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
14771                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
14772     return V;
14773   }
14774   }
14775 }
14776 
14777 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
14778                                        SDValue N1, SelectionDAG &DAG) {
14779   EVT VT = SVN->getValueType(0);
14780   int NumElts = VT.getVectorNumElements();
14781   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
14782   for (int M : SVN->getMask())
14783     if (M >= 0 && M < NumElts)
14784       N0UsedElements[M] = true;
14785     else if (M >= NumElts)
14786       N1UsedElements[M - NumElts] = true;
14787 
14788   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
14789   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
14790   if (S0 == N0 && S1 == N1)
14791     return SDValue();
14792 
14793   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
14794 }
14795 
14796 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
14797 // or turn a shuffle of a single concat into simpler shuffle then concat.
14798 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
14799   EVT VT = N->getValueType(0);
14800   unsigned NumElts = VT.getVectorNumElements();
14801 
14802   SDValue N0 = N->getOperand(0);
14803   SDValue N1 = N->getOperand(1);
14804   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14805 
14806   SmallVector<SDValue, 4> Ops;
14807   EVT ConcatVT = N0.getOperand(0).getValueType();
14808   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
14809   unsigned NumConcats = NumElts / NumElemsPerConcat;
14810 
14811   // Special case: shuffle(concat(A,B)) can be more efficiently represented
14812   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
14813   // half vector elements.
14814   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
14815       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
14816                   SVN->getMask().end(), [](int i) { return i == -1; })) {
14817     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
14818                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
14819     N1 = DAG.getUNDEF(ConcatVT);
14820     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
14821   }
14822 
14823   // Look at every vector that's inserted. We're looking for exact
14824   // subvector-sized copies from a concatenated vector
14825   for (unsigned I = 0; I != NumConcats; ++I) {
14826     // Make sure we're dealing with a copy.
14827     unsigned Begin = I * NumElemsPerConcat;
14828     bool AllUndef = true, NoUndef = true;
14829     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
14830       if (SVN->getMaskElt(J) >= 0)
14831         AllUndef = false;
14832       else
14833         NoUndef = false;
14834     }
14835 
14836     if (NoUndef) {
14837       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
14838         return SDValue();
14839 
14840       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
14841         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
14842           return SDValue();
14843 
14844       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
14845       if (FirstElt < N0.getNumOperands())
14846         Ops.push_back(N0.getOperand(FirstElt));
14847       else
14848         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
14849 
14850     } else if (AllUndef) {
14851       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
14852     } else { // Mixed with general masks and undefs, can't do optimization.
14853       return SDValue();
14854     }
14855   }
14856 
14857   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
14858 }
14859 
14860 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14861 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14862 //
14863 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
14864 // a simplification in some sense, but it isn't appropriate in general: some
14865 // BUILD_VECTORs are substantially cheaper than others. The general case
14866 // of a BUILD_VECTOR requires inserting each element individually (or
14867 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
14868 // all constants is a single constant pool load.  A BUILD_VECTOR where each
14869 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
14870 // are undef lowers to a small number of element insertions.
14871 //
14872 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
14873 // We don't fold shuffles where one side is a non-zero constant, and we don't
14874 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
14875 // non-constant operands. This seems to work out reasonably well in practice.
14876 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
14877                                        SelectionDAG &DAG,
14878                                        const TargetLowering &TLI) {
14879   EVT VT = SVN->getValueType(0);
14880   unsigned NumElts = VT.getVectorNumElements();
14881   SDValue N0 = SVN->getOperand(0);
14882   SDValue N1 = SVN->getOperand(1);
14883 
14884   if (!N0->hasOneUse() || !N1->hasOneUse())
14885     return SDValue();
14886   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
14887   // discussed above.
14888   if (!N1.isUndef()) {
14889     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
14890     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
14891     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
14892       return SDValue();
14893     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
14894       return SDValue();
14895   }
14896 
14897   SmallVector<SDValue, 8> Ops;
14898   SmallSet<SDValue, 16> DuplicateOps;
14899   for (int M : SVN->getMask()) {
14900     SDValue Op = DAG.getUNDEF(VT.getScalarType());
14901     if (M >= 0) {
14902       int Idx = M < (int)NumElts ? M : M - NumElts;
14903       SDValue &S = (M < (int)NumElts ? N0 : N1);
14904       if (S.getOpcode() == ISD::BUILD_VECTOR) {
14905         Op = S.getOperand(Idx);
14906       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14907         if (Idx == 0)
14908           Op = S.getOperand(0);
14909       } else {
14910         // Operand can't be combined - bail out.
14911         return SDValue();
14912       }
14913     }
14914 
14915     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
14916     // fine, but it's likely to generate low-quality code if the target can't
14917     // reconstruct an appropriate shuffle.
14918     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
14919       if (!DuplicateOps.insert(Op).second)
14920         return SDValue();
14921 
14922     Ops.push_back(Op);
14923   }
14924   // BUILD_VECTOR requires all inputs to be of the same type, find the
14925   // maximum type and extend them all.
14926   EVT SVT = VT.getScalarType();
14927   if (SVT.isInteger())
14928     for (SDValue &Op : Ops)
14929       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
14930   if (SVT != VT.getScalarType())
14931     for (SDValue &Op : Ops)
14932       Op = TLI.isZExtFree(Op.getValueType(), SVT)
14933                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
14934                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
14935   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
14936 }
14937 
14938 // Match shuffles that can be converted to any_vector_extend_in_reg.
14939 // This is often generated during legalization.
14940 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
14941 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
14942 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
14943                                             SelectionDAG &DAG,
14944                                             const TargetLowering &TLI,
14945                                             bool LegalOperations) {
14946   EVT VT = SVN->getValueType(0);
14947   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14948 
14949   // TODO Add support for big-endian when we have a test case.
14950   if (!VT.isInteger() || IsBigEndian)
14951     return SDValue();
14952 
14953   unsigned NumElts = VT.getVectorNumElements();
14954   unsigned EltSizeInBits = VT.getScalarSizeInBits();
14955   ArrayRef<int> Mask = SVN->getMask();
14956   SDValue N0 = SVN->getOperand(0);
14957 
14958   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
14959   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
14960     for (unsigned i = 0; i != NumElts; ++i) {
14961       if (Mask[i] < 0)
14962         continue;
14963       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
14964         continue;
14965       return false;
14966     }
14967     return true;
14968   };
14969 
14970   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
14971   // power-of-2 extensions as they are the most likely.
14972   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
14973     if (!isAnyExtend(Scale))
14974       continue;
14975 
14976     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
14977     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
14978     if (!LegalOperations ||
14979         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
14980       return DAG.getBitcast(VT,
14981                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
14982   }
14983 
14984   return SDValue();
14985 }
14986 
14987 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
14988 // each source element of a large type into the lowest elements of a smaller
14989 // destination type. This is often generated during legalization.
14990 // If the source node itself was a '*_extend_vector_inreg' node then we should
14991 // then be able to remove it.
14992 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
14993                                         SelectionDAG &DAG) {
14994   EVT VT = SVN->getValueType(0);
14995   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14996 
14997   // TODO Add support for big-endian when we have a test case.
14998   if (!VT.isInteger() || IsBigEndian)
14999     return SDValue();
15000 
15001   SDValue N0 = SVN->getOperand(0);
15002   while (N0.getOpcode() == ISD::BITCAST)
15003     N0 = N0.getOperand(0);
15004 
15005   unsigned Opcode = N0.getOpcode();
15006   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15007       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15008       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15009     return SDValue();
15010 
15011   SDValue N00 = N0.getOperand(0);
15012   ArrayRef<int> Mask = SVN->getMask();
15013   unsigned NumElts = VT.getVectorNumElements();
15014   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15015   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15016   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15017 
15018   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15019     return SDValue();
15020   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15021 
15022   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15023   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15024   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15025   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15026     for (unsigned i = 0; i != NumElts; ++i) {
15027       if (Mask[i] < 0)
15028         continue;
15029       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15030         continue;
15031       return false;
15032     }
15033     return true;
15034   };
15035 
15036   // At the moment we just handle the case where we've truncated back to the
15037   // same size as before the extension.
15038   // TODO: handle more extension/truncation cases as cases arise.
15039   if (EltSizeInBits != ExtSrcSizeInBits)
15040     return SDValue();
15041 
15042   // We can remove *extend_vector_inreg only if the truncation happens at
15043   // the same scale as the extension.
15044   if (isTruncate(ExtScale))
15045     return DAG.getBitcast(VT, N00);
15046 
15047   return SDValue();
15048 }
15049 
15050 // Combine shuffles of splat-shuffles of the form:
15051 // shuffle (shuffle V, undef, splat-mask), undef, M
15052 // If splat-mask contains undef elements, we need to be careful about
15053 // introducing undef's in the folded mask which are not the result of composing
15054 // the masks of the shuffles.
15055 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15056                                      ShuffleVectorSDNode *Splat,
15057                                      SelectionDAG &DAG) {
15058   ArrayRef<int> SplatMask = Splat->getMask();
15059   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15060 
15061   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15062   // every undef mask element in the splat-shuffle has a corresponding undef
15063   // element in the user-shuffle's mask or if the composition of mask elements
15064   // would result in undef.
15065   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15066   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15067   //   In this case it is not legal to simplify to the splat-shuffle because we
15068   //   may be exposing the users of the shuffle an undef element at index 1
15069   //   which was not there before the combine.
15070   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15071   //   In this case the composition of masks yields SplatMask, so it's ok to
15072   //   simplify to the splat-shuffle.
15073   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15074   //   In this case the composed mask includes all undef elements of SplatMask
15075   //   and in addition sets element zero to undef. It is safe to simplify to
15076   //   the splat-shuffle.
15077   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15078                                        ArrayRef<int> SplatMask) {
15079     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15080       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15081           SplatMask[UserMask[i]] != -1)
15082         return false;
15083     return true;
15084   };
15085   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15086     return SDValue(Splat, 0);
15087 
15088   // Create a new shuffle with a mask that is composed of the two shuffles'
15089   // masks.
15090   SmallVector<int, 32> NewMask;
15091   for (int Idx : UserMask)
15092     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15093 
15094   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15095                               Splat->getOperand(0), Splat->getOperand(1),
15096                               NewMask);
15097 }
15098 
15099 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15100   EVT VT = N->getValueType(0);
15101   unsigned NumElts = VT.getVectorNumElements();
15102 
15103   SDValue N0 = N->getOperand(0);
15104   SDValue N1 = N->getOperand(1);
15105 
15106   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15107 
15108   // Canonicalize shuffle undef, undef -> undef
15109   if (N0.isUndef() && N1.isUndef())
15110     return DAG.getUNDEF(VT);
15111 
15112   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15113 
15114   // Canonicalize shuffle v, v -> v, undef
15115   if (N0 == N1) {
15116     SmallVector<int, 8> NewMask;
15117     for (unsigned i = 0; i != NumElts; ++i) {
15118       int Idx = SVN->getMaskElt(i);
15119       if (Idx >= (int)NumElts) Idx -= NumElts;
15120       NewMask.push_back(Idx);
15121     }
15122     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15123   }
15124 
15125   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15126   if (N0.isUndef())
15127     return DAG.getCommutedVectorShuffle(*SVN);
15128 
15129   // Remove references to rhs if it is undef
15130   if (N1.isUndef()) {
15131     bool Changed = false;
15132     SmallVector<int, 8> NewMask;
15133     for (unsigned i = 0; i != NumElts; ++i) {
15134       int Idx = SVN->getMaskElt(i);
15135       if (Idx >= (int)NumElts) {
15136         Idx = -1;
15137         Changed = true;
15138       }
15139       NewMask.push_back(Idx);
15140     }
15141     if (Changed)
15142       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15143   }
15144 
15145   // A shuffle of a single vector that is a splat can always be folded.
15146   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15147     if (N1->isUndef() && N0Shuf->isSplat())
15148       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15149 
15150   // If it is a splat, check if the argument vector is another splat or a
15151   // build_vector.
15152   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15153     SDNode *V = N0.getNode();
15154 
15155     // If this is a bit convert that changes the element type of the vector but
15156     // not the number of vector elements, look through it.  Be careful not to
15157     // look though conversions that change things like v4f32 to v2f64.
15158     if (V->getOpcode() == ISD::BITCAST) {
15159       SDValue ConvInput = V->getOperand(0);
15160       if (ConvInput.getValueType().isVector() &&
15161           ConvInput.getValueType().getVectorNumElements() == NumElts)
15162         V = ConvInput.getNode();
15163     }
15164 
15165     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15166       assert(V->getNumOperands() == NumElts &&
15167              "BUILD_VECTOR has wrong number of operands");
15168       SDValue Base;
15169       bool AllSame = true;
15170       for (unsigned i = 0; i != NumElts; ++i) {
15171         if (!V->getOperand(i).isUndef()) {
15172           Base = V->getOperand(i);
15173           break;
15174         }
15175       }
15176       // Splat of <u, u, u, u>, return <u, u, u, u>
15177       if (!Base.getNode())
15178         return N0;
15179       for (unsigned i = 0; i != NumElts; ++i) {
15180         if (V->getOperand(i) != Base) {
15181           AllSame = false;
15182           break;
15183         }
15184       }
15185       // Splat of <x, x, x, x>, return <x, x, x, x>
15186       if (AllSame)
15187         return N0;
15188 
15189       // Canonicalize any other splat as a build_vector.
15190       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15191       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15192       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15193 
15194       // We may have jumped through bitcasts, so the type of the
15195       // BUILD_VECTOR may not match the type of the shuffle.
15196       if (V->getValueType(0) != VT)
15197         NewBV = DAG.getBitcast(VT, NewBV);
15198       return NewBV;
15199     }
15200   }
15201 
15202   // There are various patterns used to build up a vector from smaller vectors,
15203   // subvectors, or elements. Scan chains of these and replace unused insertions
15204   // or components with undef.
15205   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15206     return S;
15207 
15208   // Match shuffles that can be converted to any_vector_extend_in_reg.
15209   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
15210     return V;
15211 
15212   // Combine "truncate_vector_in_reg" style shuffles.
15213   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15214     return V;
15215 
15216   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15217       Level < AfterLegalizeVectorOps &&
15218       (N1.isUndef() ||
15219       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15220        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15221     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15222       return V;
15223   }
15224 
15225   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15226   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15227   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15228     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15229       return Res;
15230 
15231   // If this shuffle only has a single input that is a bitcasted shuffle,
15232   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15233   // back to their original types.
15234   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15235       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15236       TLI.isTypeLegal(VT)) {
15237 
15238     // Peek through the bitcast only if there is one user.
15239     SDValue BC0 = N0;
15240     while (BC0.getOpcode() == ISD::BITCAST) {
15241       if (!BC0.hasOneUse())
15242         break;
15243       BC0 = BC0.getOperand(0);
15244     }
15245 
15246     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15247       if (Scale == 1)
15248         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15249 
15250       SmallVector<int, 8> NewMask;
15251       for (int M : Mask)
15252         for (int s = 0; s != Scale; ++s)
15253           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15254       return NewMask;
15255     };
15256 
15257     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15258       EVT SVT = VT.getScalarType();
15259       EVT InnerVT = BC0->getValueType(0);
15260       EVT InnerSVT = InnerVT.getScalarType();
15261 
15262       // Determine which shuffle works with the smaller scalar type.
15263       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15264       EVT ScaleSVT = ScaleVT.getScalarType();
15265 
15266       if (TLI.isTypeLegal(ScaleVT) &&
15267           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15268           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15269 
15270         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15271         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15272 
15273         // Scale the shuffle masks to the smaller scalar type.
15274         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15275         SmallVector<int, 8> InnerMask =
15276             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15277         SmallVector<int, 8> OuterMask =
15278             ScaleShuffleMask(SVN->getMask(), OuterScale);
15279 
15280         // Merge the shuffle masks.
15281         SmallVector<int, 8> NewMask;
15282         for (int M : OuterMask)
15283           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15284 
15285         // Test for shuffle mask legality over both commutations.
15286         SDValue SV0 = BC0->getOperand(0);
15287         SDValue SV1 = BC0->getOperand(1);
15288         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15289         if (!LegalMask) {
15290           std::swap(SV0, SV1);
15291           ShuffleVectorSDNode::commuteMask(NewMask);
15292           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15293         }
15294 
15295         if (LegalMask) {
15296           SV0 = DAG.getBitcast(ScaleVT, SV0);
15297           SV1 = DAG.getBitcast(ScaleVT, SV1);
15298           return DAG.getBitcast(
15299               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15300         }
15301       }
15302     }
15303   }
15304 
15305   // Canonicalize shuffles according to rules:
15306   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15307   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15308   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15309   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15310       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15311       TLI.isTypeLegal(VT)) {
15312     // The incoming shuffle must be of the same type as the result of the
15313     // current shuffle.
15314     assert(N1->getOperand(0).getValueType() == VT &&
15315            "Shuffle types don't match");
15316 
15317     SDValue SV0 = N1->getOperand(0);
15318     SDValue SV1 = N1->getOperand(1);
15319     bool HasSameOp0 = N0 == SV0;
15320     bool IsSV1Undef = SV1.isUndef();
15321     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15322       // Commute the operands of this shuffle so that next rule
15323       // will trigger.
15324       return DAG.getCommutedVectorShuffle(*SVN);
15325   }
15326 
15327   // Try to fold according to rules:
15328   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15329   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15330   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15331   // Don't try to fold shuffles with illegal type.
15332   // Only fold if this shuffle is the only user of the other shuffle.
15333   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15334       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15335     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15336 
15337     // Don't try to fold splats; they're likely to simplify somehow, or they
15338     // might be free.
15339     if (OtherSV->isSplat())
15340       return SDValue();
15341 
15342     // The incoming shuffle must be of the same type as the result of the
15343     // current shuffle.
15344     assert(OtherSV->getOperand(0).getValueType() == VT &&
15345            "Shuffle types don't match");
15346 
15347     SDValue SV0, SV1;
15348     SmallVector<int, 4> Mask;
15349     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15350     // operand, and SV1 as the second operand.
15351     for (unsigned i = 0; i != NumElts; ++i) {
15352       int Idx = SVN->getMaskElt(i);
15353       if (Idx < 0) {
15354         // Propagate Undef.
15355         Mask.push_back(Idx);
15356         continue;
15357       }
15358 
15359       SDValue CurrentVec;
15360       if (Idx < (int)NumElts) {
15361         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15362         // shuffle mask to identify which vector is actually referenced.
15363         Idx = OtherSV->getMaskElt(Idx);
15364         if (Idx < 0) {
15365           // Propagate Undef.
15366           Mask.push_back(Idx);
15367           continue;
15368         }
15369 
15370         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15371                                            : OtherSV->getOperand(1);
15372       } else {
15373         // This shuffle index references an element within N1.
15374         CurrentVec = N1;
15375       }
15376 
15377       // Simple case where 'CurrentVec' is UNDEF.
15378       if (CurrentVec.isUndef()) {
15379         Mask.push_back(-1);
15380         continue;
15381       }
15382 
15383       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15384       // will be the first or second operand of the combined shuffle.
15385       Idx = Idx % NumElts;
15386       if (!SV0.getNode() || SV0 == CurrentVec) {
15387         // Ok. CurrentVec is the left hand side.
15388         // Update the mask accordingly.
15389         SV0 = CurrentVec;
15390         Mask.push_back(Idx);
15391         continue;
15392       }
15393 
15394       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15395       if (SV1.getNode() && SV1 != CurrentVec)
15396         return SDValue();
15397 
15398       // Ok. CurrentVec is the right hand side.
15399       // Update the mask accordingly.
15400       SV1 = CurrentVec;
15401       Mask.push_back(Idx + NumElts);
15402     }
15403 
15404     // Check if all indices in Mask are Undef. In case, propagate Undef.
15405     bool isUndefMask = true;
15406     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15407       isUndefMask &= Mask[i] < 0;
15408 
15409     if (isUndefMask)
15410       return DAG.getUNDEF(VT);
15411 
15412     if (!SV0.getNode())
15413       SV0 = DAG.getUNDEF(VT);
15414     if (!SV1.getNode())
15415       SV1 = DAG.getUNDEF(VT);
15416 
15417     // Avoid introducing shuffles with illegal mask.
15418     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15419       ShuffleVectorSDNode::commuteMask(Mask);
15420 
15421       if (!TLI.isShuffleMaskLegal(Mask, VT))
15422         return SDValue();
15423 
15424       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15425       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15426       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15427       std::swap(SV0, SV1);
15428     }
15429 
15430     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15431     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15432     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15433     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15434   }
15435 
15436   return SDValue();
15437 }
15438 
15439 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
15440   SDValue InVal = N->getOperand(0);
15441   EVT VT = N->getValueType(0);
15442 
15443   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
15444   // with a VECTOR_SHUFFLE.
15445   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15446     SDValue InVec = InVal->getOperand(0);
15447     SDValue EltNo = InVal->getOperand(1);
15448 
15449     // FIXME: We could support implicit truncation if the shuffle can be
15450     // scaled to a smaller vector scalar type.
15451     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
15452     if (C0 && VT == InVec.getValueType() &&
15453         VT.getScalarType() == InVal.getValueType()) {
15454       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
15455       int Elt = C0->getZExtValue();
15456       NewMask[0] = Elt;
15457 
15458       if (TLI.isShuffleMaskLegal(NewMask, VT))
15459         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
15460                                     NewMask);
15461     }
15462   }
15463 
15464   return SDValue();
15465 }
15466 
15467 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
15468   EVT VT = N->getValueType(0);
15469   SDValue N0 = N->getOperand(0);
15470   SDValue N1 = N->getOperand(1);
15471   SDValue N2 = N->getOperand(2);
15472 
15473   // If inserting an UNDEF, just return the original vector.
15474   if (N1.isUndef())
15475     return N0;
15476 
15477   // If this is an insert of an extracted vector into an undef vector, we can
15478   // just use the input to the extract.
15479   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15480       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
15481     return N1.getOperand(0);
15482 
15483   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
15484   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
15485   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
15486   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
15487       N0.getOperand(1).getValueType() == N1.getValueType() &&
15488       N0.getOperand(2) == N2)
15489     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
15490                        N1, N2);
15491 
15492   if (!isa<ConstantSDNode>(N2))
15493     return SDValue();
15494 
15495   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
15496 
15497   // Canonicalize insert_subvector dag nodes.
15498   // Example:
15499   // (insert_subvector (insert_subvector A, Idx0), Idx1)
15500   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
15501   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
15502       N1.getValueType() == N0.getOperand(1).getValueType() &&
15503       isa<ConstantSDNode>(N0.getOperand(2))) {
15504     unsigned OtherIdx = N0.getConstantOperandVal(2);
15505     if (InsIdx < OtherIdx) {
15506       // Swap nodes.
15507       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
15508                                   N0.getOperand(0), N1, N2);
15509       AddToWorklist(NewOp.getNode());
15510       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
15511                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
15512     }
15513   }
15514 
15515   // If the input vector is a concatenation, and the insert replaces
15516   // one of the pieces, we can optimize into a single concat_vectors.
15517   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15518       N0.getOperand(0).getValueType() == N1.getValueType()) {
15519     unsigned Factor = N1.getValueType().getVectorNumElements();
15520 
15521     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15522     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15523 
15524     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15525   }
15526 
15527   return SDValue();
15528 }
15529 
15530 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15531   SDValue N0 = N->getOperand(0);
15532 
15533   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15534   if (N0->getOpcode() == ISD::FP16_TO_FP)
15535     return N0->getOperand(0);
15536 
15537   return SDValue();
15538 }
15539 
15540 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15541   SDValue N0 = N->getOperand(0);
15542 
15543   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15544   if (N0->getOpcode() == ISD::AND) {
15545     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15546     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15547       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15548                          N0.getOperand(0));
15549     }
15550   }
15551 
15552   return SDValue();
15553 }
15554 
15555 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15556 /// with the destination vector and a zero vector.
15557 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15558 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15559 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15560   EVT VT = N->getValueType(0);
15561   SDValue LHS = N->getOperand(0);
15562   SDValue RHS = N->getOperand(1);
15563   SDLoc DL(N);
15564 
15565   // Make sure we're not running after operation legalization where it
15566   // may have custom lowered the vector shuffles.
15567   if (LegalOperations)
15568     return SDValue();
15569 
15570   if (N->getOpcode() != ISD::AND)
15571     return SDValue();
15572 
15573   if (RHS.getOpcode() == ISD::BITCAST)
15574     RHS = RHS.getOperand(0);
15575 
15576   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15577     return SDValue();
15578 
15579   EVT RVT = RHS.getValueType();
15580   unsigned NumElts = RHS.getNumOperands();
15581 
15582   // Attempt to create a valid clear mask, splitting the mask into
15583   // sub elements and checking to see if each is
15584   // all zeros or all ones - suitable for shuffle masking.
15585   auto BuildClearMask = [&](int Split) {
15586     int NumSubElts = NumElts * Split;
15587     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15588 
15589     SmallVector<int, 8> Indices;
15590     for (int i = 0; i != NumSubElts; ++i) {
15591       int EltIdx = i / Split;
15592       int SubIdx = i % Split;
15593       SDValue Elt = RHS.getOperand(EltIdx);
15594       if (Elt.isUndef()) {
15595         Indices.push_back(-1);
15596         continue;
15597       }
15598 
15599       APInt Bits;
15600       if (isa<ConstantSDNode>(Elt))
15601         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15602       else if (isa<ConstantFPSDNode>(Elt))
15603         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15604       else
15605         return SDValue();
15606 
15607       // Extract the sub element from the constant bit mask.
15608       if (DAG.getDataLayout().isBigEndian()) {
15609         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
15610       } else {
15611         Bits.lshrInPlace(SubIdx * NumSubBits);
15612       }
15613 
15614       if (Split > 1)
15615         Bits = Bits.trunc(NumSubBits);
15616 
15617       if (Bits.isAllOnesValue())
15618         Indices.push_back(i);
15619       else if (Bits == 0)
15620         Indices.push_back(i + NumSubElts);
15621       else
15622         return SDValue();
15623     }
15624 
15625     // Let's see if the target supports this vector_shuffle.
15626     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15627     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15628     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15629       return SDValue();
15630 
15631     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15632     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15633                                                    DAG.getBitcast(ClearVT, LHS),
15634                                                    Zero, Indices));
15635   };
15636 
15637   // Determine maximum split level (byte level masking).
15638   int MaxSplit = 1;
15639   if (RVT.getScalarSizeInBits() % 8 == 0)
15640     MaxSplit = RVT.getScalarSizeInBits() / 8;
15641 
15642   for (int Split = 1; Split <= MaxSplit; ++Split)
15643     if (RVT.getScalarSizeInBits() % Split == 0)
15644       if (SDValue S = BuildClearMask(Split))
15645         return S;
15646 
15647   return SDValue();
15648 }
15649 
15650 /// Visit a binary vector operation, like ADD.
15651 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15652   assert(N->getValueType(0).isVector() &&
15653          "SimplifyVBinOp only works on vectors!");
15654 
15655   SDValue LHS = N->getOperand(0);
15656   SDValue RHS = N->getOperand(1);
15657   SDValue Ops[] = {LHS, RHS};
15658 
15659   // See if we can constant fold the vector operation.
15660   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15661           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15662     return Fold;
15663 
15664   // Try to convert a constant mask AND into a shuffle clear mask.
15665   if (SDValue Shuffle = XformToShuffleWithZero(N))
15666     return Shuffle;
15667 
15668   // Type legalization might introduce new shuffles in the DAG.
15669   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15670   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15671   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15672       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15673       LHS.getOperand(1).isUndef() &&
15674       RHS.getOperand(1).isUndef()) {
15675     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15676     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15677 
15678     if (SVN0->getMask().equals(SVN1->getMask())) {
15679       EVT VT = N->getValueType(0);
15680       SDValue UndefVector = LHS.getOperand(1);
15681       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15682                                      LHS.getOperand(0), RHS.getOperand(0),
15683                                      N->getFlags());
15684       AddUsersToWorklist(N);
15685       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15686                                   SVN0->getMask());
15687     }
15688   }
15689 
15690   return SDValue();
15691 }
15692 
15693 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15694                                     SDValue N2) {
15695   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15696 
15697   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15698                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15699 
15700   // If we got a simplified select_cc node back from SimplifySelectCC, then
15701   // break it down into a new SETCC node, and a new SELECT node, and then return
15702   // the SELECT node, since we were called with a SELECT node.
15703   if (SCC.getNode()) {
15704     // Check to see if we got a select_cc back (to turn into setcc/select).
15705     // Otherwise, just return whatever node we got back, like fabs.
15706     if (SCC.getOpcode() == ISD::SELECT_CC) {
15707       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15708                                   N0.getValueType(),
15709                                   SCC.getOperand(0), SCC.getOperand(1),
15710                                   SCC.getOperand(4));
15711       AddToWorklist(SETCC.getNode());
15712       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15713                            SCC.getOperand(2), SCC.getOperand(3));
15714     }
15715 
15716     return SCC;
15717   }
15718   return SDValue();
15719 }
15720 
15721 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15722 /// being selected between, see if we can simplify the select.  Callers of this
15723 /// should assume that TheSelect is deleted if this returns true.  As such, they
15724 /// should return the appropriate thing (e.g. the node) back to the top-level of
15725 /// the DAG combiner loop to avoid it being looked at.
15726 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15727                                     SDValue RHS) {
15728 
15729   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15730   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
15731   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
15732     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
15733       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
15734       SDValue Sqrt = RHS;
15735       ISD::CondCode CC;
15736       SDValue CmpLHS;
15737       const ConstantFPSDNode *Zero = nullptr;
15738 
15739       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
15740         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
15741         CmpLHS = TheSelect->getOperand(0);
15742         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
15743       } else {
15744         // SELECT or VSELECT
15745         SDValue Cmp = TheSelect->getOperand(0);
15746         if (Cmp.getOpcode() == ISD::SETCC) {
15747           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
15748           CmpLHS = Cmp.getOperand(0);
15749           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
15750         }
15751       }
15752       if (Zero && Zero->isZero() &&
15753           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
15754           CC == ISD::SETULT || CC == ISD::SETLT)) {
15755         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15756         CombineTo(TheSelect, Sqrt);
15757         return true;
15758       }
15759     }
15760   }
15761   // Cannot simplify select with vector condition
15762   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
15763 
15764   // If this is a select from two identical things, try to pull the operation
15765   // through the select.
15766   if (LHS.getOpcode() != RHS.getOpcode() ||
15767       !LHS.hasOneUse() || !RHS.hasOneUse())
15768     return false;
15769 
15770   // If this is a load and the token chain is identical, replace the select
15771   // of two loads with a load through a select of the address to load from.
15772   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
15773   // constants have been dropped into the constant pool.
15774   if (LHS.getOpcode() == ISD::LOAD) {
15775     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
15776     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
15777 
15778     // Token chains must be identical.
15779     if (LHS.getOperand(0) != RHS.getOperand(0) ||
15780         // Do not let this transformation reduce the number of volatile loads.
15781         LLD->isVolatile() || RLD->isVolatile() ||
15782         // FIXME: If either is a pre/post inc/dec load,
15783         // we'd need to split out the address adjustment.
15784         LLD->isIndexed() || RLD->isIndexed() ||
15785         // If this is an EXTLOAD, the VT's must match.
15786         LLD->getMemoryVT() != RLD->getMemoryVT() ||
15787         // If this is an EXTLOAD, the kind of extension must match.
15788         (LLD->getExtensionType() != RLD->getExtensionType() &&
15789          // The only exception is if one of the extensions is anyext.
15790          LLD->getExtensionType() != ISD::EXTLOAD &&
15791          RLD->getExtensionType() != ISD::EXTLOAD) ||
15792         // FIXME: this discards src value information.  This is
15793         // over-conservative. It would be beneficial to be able to remember
15794         // both potential memory locations.  Since we are discarding
15795         // src value info, don't do the transformation if the memory
15796         // locations are not in the default address space.
15797         LLD->getPointerInfo().getAddrSpace() != 0 ||
15798         RLD->getPointerInfo().getAddrSpace() != 0 ||
15799         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
15800                                       LLD->getBasePtr().getValueType()))
15801       return false;
15802 
15803     // Check that the select condition doesn't reach either load.  If so,
15804     // folding this will induce a cycle into the DAG.  If not, this is safe to
15805     // xform, so create a select of the addresses.
15806     SDValue Addr;
15807     if (TheSelect->getOpcode() == ISD::SELECT) {
15808       SDNode *CondNode = TheSelect->getOperand(0).getNode();
15809       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
15810           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
15811         return false;
15812       // The loads must not depend on one another.
15813       if (LLD->isPredecessorOf(RLD) ||
15814           RLD->isPredecessorOf(LLD))
15815         return false;
15816       Addr = DAG.getSelect(SDLoc(TheSelect),
15817                            LLD->getBasePtr().getValueType(),
15818                            TheSelect->getOperand(0), LLD->getBasePtr(),
15819                            RLD->getBasePtr());
15820     } else {  // Otherwise SELECT_CC
15821       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
15822       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
15823 
15824       if ((LLD->hasAnyUseOfValue(1) &&
15825            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
15826           (RLD->hasAnyUseOfValue(1) &&
15827            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
15828         return false;
15829 
15830       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
15831                          LLD->getBasePtr().getValueType(),
15832                          TheSelect->getOperand(0),
15833                          TheSelect->getOperand(1),
15834                          LLD->getBasePtr(), RLD->getBasePtr(),
15835                          TheSelect->getOperand(4));
15836     }
15837 
15838     SDValue Load;
15839     // It is safe to replace the two loads if they have different alignments,
15840     // but the new load must be the minimum (most restrictive) alignment of the
15841     // inputs.
15842     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
15843     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
15844     if (!RLD->isInvariant())
15845       MMOFlags &= ~MachineMemOperand::MOInvariant;
15846     if (!RLD->isDereferenceable())
15847       MMOFlags &= ~MachineMemOperand::MODereferenceable;
15848     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
15849       // FIXME: Discards pointer and AA info.
15850       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
15851                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
15852                          MMOFlags);
15853     } else {
15854       // FIXME: Discards pointer and AA info.
15855       Load = DAG.getExtLoad(
15856           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
15857                                                   : LLD->getExtensionType(),
15858           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
15859           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
15860     }
15861 
15862     // Users of the select now use the result of the load.
15863     CombineTo(TheSelect, Load);
15864 
15865     // Users of the old loads now use the new load's chain.  We know the
15866     // old-load value is dead now.
15867     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
15868     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
15869     return true;
15870   }
15871 
15872   return false;
15873 }
15874 
15875 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
15876 /// bitwise 'and'.
15877 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
15878                                             SDValue N1, SDValue N2, SDValue N3,
15879                                             ISD::CondCode CC) {
15880   // If this is a select where the false operand is zero and the compare is a
15881   // check of the sign bit, see if we can perform the "gzip trick":
15882   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
15883   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
15884   EVT XType = N0.getValueType();
15885   EVT AType = N2.getValueType();
15886   if (!isNullConstant(N3) || !XType.bitsGE(AType))
15887     return SDValue();
15888 
15889   // If the comparison is testing for a positive value, we have to invert
15890   // the sign bit mask, so only do that transform if the target has a bitwise
15891   // 'and not' instruction (the invert is free).
15892   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
15893     // (X > -1) ? A : 0
15894     // (X >  0) ? X : 0 <-- This is canonical signed max.
15895     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
15896       return SDValue();
15897   } else if (CC == ISD::SETLT) {
15898     // (X <  0) ? A : 0
15899     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
15900     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
15901       return SDValue();
15902   } else {
15903     return SDValue();
15904   }
15905 
15906   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
15907   // constant.
15908   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
15909   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15910   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
15911     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
15912     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
15913     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
15914     AddToWorklist(Shift.getNode());
15915 
15916     if (XType.bitsGT(AType)) {
15917       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15918       AddToWorklist(Shift.getNode());
15919     }
15920 
15921     if (CC == ISD::SETGT)
15922       Shift = DAG.getNOT(DL, Shift, AType);
15923 
15924     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15925   }
15926 
15927   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
15928   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
15929   AddToWorklist(Shift.getNode());
15930 
15931   if (XType.bitsGT(AType)) {
15932     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15933     AddToWorklist(Shift.getNode());
15934   }
15935 
15936   if (CC == ISD::SETGT)
15937     Shift = DAG.getNOT(DL, Shift, AType);
15938 
15939   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15940 }
15941 
15942 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
15943 /// where 'cond' is the comparison specified by CC.
15944 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
15945                                       SDValue N2, SDValue N3, ISD::CondCode CC,
15946                                       bool NotExtCompare) {
15947   // (x ? y : y) -> y.
15948   if (N2 == N3) return N2;
15949 
15950   EVT VT = N2.getValueType();
15951   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
15952   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15953 
15954   // Determine if the condition we're dealing with is constant
15955   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
15956                               N0, N1, CC, DL, false);
15957   if (SCC.getNode()) AddToWorklist(SCC.getNode());
15958 
15959   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
15960     // fold select_cc true, x, y -> x
15961     // fold select_cc false, x, y -> y
15962     return !SCCC->isNullValue() ? N2 : N3;
15963   }
15964 
15965   // Check to see if we can simplify the select into an fabs node
15966   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15967     // Allow either -0.0 or 0.0
15968     if (CFP->isZero()) {
15969       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
15970       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
15971           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
15972           N2 == N3.getOperand(0))
15973         return DAG.getNode(ISD::FABS, DL, VT, N0);
15974 
15975       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
15976       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
15977           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
15978           N2.getOperand(0) == N3)
15979         return DAG.getNode(ISD::FABS, DL, VT, N3);
15980     }
15981   }
15982 
15983   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
15984   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
15985   // in it.  This is a win when the constant is not otherwise available because
15986   // it replaces two constant pool loads with one.  We only do this if the FP
15987   // type is known to be legal, because if it isn't, then we are before legalize
15988   // types an we want the other legalization to happen first (e.g. to avoid
15989   // messing with soft float) and if the ConstantFP is not legal, because if
15990   // it is legal, we may not need to store the FP constant in a constant pool.
15991   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
15992     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
15993       if (TLI.isTypeLegal(N2.getValueType()) &&
15994           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
15995                TargetLowering::Legal &&
15996            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
15997            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
15998           // If both constants have multiple uses, then we won't need to do an
15999           // extra load, they are likely around in registers for other users.
16000           (TV->hasOneUse() || FV->hasOneUse())) {
16001         Constant *Elts[] = {
16002           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16003           const_cast<ConstantFP*>(TV->getConstantFPValue())
16004         };
16005         Type *FPTy = Elts[0]->getType();
16006         const DataLayout &TD = DAG.getDataLayout();
16007 
16008         // Create a ConstantArray of the two constants.
16009         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16010         SDValue CPIdx =
16011             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16012                                 TD.getPrefTypeAlignment(FPTy));
16013         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16014 
16015         // Get the offsets to the 0 and 1 element of the array so that we can
16016         // select between them.
16017         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16018         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16019         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16020 
16021         SDValue Cond = DAG.getSetCC(DL,
16022                                     getSetCCResultType(N0.getValueType()),
16023                                     N0, N1, CC);
16024         AddToWorklist(Cond.getNode());
16025         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16026                                           Cond, One, Zero);
16027         AddToWorklist(CstOffset.getNode());
16028         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16029                             CstOffset);
16030         AddToWorklist(CPIdx.getNode());
16031         return DAG.getLoad(
16032             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16033             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16034             Alignment);
16035       }
16036     }
16037 
16038   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16039     return V;
16040 
16041   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16042   // where y is has a single bit set.
16043   // A plaintext description would be, we can turn the SELECT_CC into an AND
16044   // when the condition can be materialized as an all-ones register.  Any
16045   // single bit-test can be materialized as an all-ones register with
16046   // shift-left and shift-right-arith.
16047   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16048       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16049     SDValue AndLHS = N0->getOperand(0);
16050     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16051     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16052       // Shift the tested bit over the sign bit.
16053       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16054       SDValue ShlAmt =
16055         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16056                         getShiftAmountTy(AndLHS.getValueType()));
16057       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16058 
16059       // Now arithmetic right shift it all the way over, so the result is either
16060       // all-ones, or zero.
16061       SDValue ShrAmt =
16062         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16063                         getShiftAmountTy(Shl.getValueType()));
16064       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16065 
16066       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16067     }
16068   }
16069 
16070   // fold select C, 16, 0 -> shl C, 4
16071   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16072       TLI.getBooleanContents(N0.getValueType()) ==
16073           TargetLowering::ZeroOrOneBooleanContent) {
16074 
16075     // If the caller doesn't want us to simplify this into a zext of a compare,
16076     // don't do it.
16077     if (NotExtCompare && N2C->isOne())
16078       return SDValue();
16079 
16080     // Get a SetCC of the condition
16081     // NOTE: Don't create a SETCC if it's not legal on this target.
16082     if (!LegalOperations ||
16083         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16084       SDValue Temp, SCC;
16085       // cast from setcc result type to select result type
16086       if (LegalTypes) {
16087         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16088                             N0, N1, CC);
16089         if (N2.getValueType().bitsLT(SCC.getValueType()))
16090           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16091                                         N2.getValueType());
16092         else
16093           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16094                              N2.getValueType(), SCC);
16095       } else {
16096         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16097         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16098                            N2.getValueType(), SCC);
16099       }
16100 
16101       AddToWorklist(SCC.getNode());
16102       AddToWorklist(Temp.getNode());
16103 
16104       if (N2C->isOne())
16105         return Temp;
16106 
16107       // shl setcc result by log2 n2c
16108       return DAG.getNode(
16109           ISD::SHL, DL, N2.getValueType(), Temp,
16110           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16111                           getShiftAmountTy(Temp.getValueType())));
16112     }
16113   }
16114 
16115   // Check to see if this is an integer abs.
16116   // select_cc setg[te] X,  0,  X, -X ->
16117   // select_cc setgt    X, -1,  X, -X ->
16118   // select_cc setl[te] X,  0, -X,  X ->
16119   // select_cc setlt    X,  1, -X,  X ->
16120   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16121   if (N1C) {
16122     ConstantSDNode *SubC = nullptr;
16123     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16124          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16125         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16126       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16127     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16128               (N1C->isOne() && CC == ISD::SETLT)) &&
16129              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16130       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16131 
16132     EVT XType = N0.getValueType();
16133     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16134       SDLoc DL(N0);
16135       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16136                                   N0,
16137                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16138                                          getShiftAmountTy(N0.getValueType())));
16139       SDValue Add = DAG.getNode(ISD::ADD, DL,
16140                                 XType, N0, Shift);
16141       AddToWorklist(Shift.getNode());
16142       AddToWorklist(Add.getNode());
16143       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16144     }
16145   }
16146 
16147   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16148   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16149   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16150   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16151   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16152   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16153   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16154   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16155   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16156     SDValue ValueOnZero = N2;
16157     SDValue Count = N3;
16158     // If the condition is NE instead of E, swap the operands.
16159     if (CC == ISD::SETNE)
16160       std::swap(ValueOnZero, Count);
16161     // Check if the value on zero is a constant equal to the bits in the type.
16162     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16163       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16164         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16165         // legal, combine to just cttz.
16166         if ((Count.getOpcode() == ISD::CTTZ ||
16167              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16168             N0 == Count.getOperand(0) &&
16169             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16170           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16171         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16172         // legal, combine to just ctlz.
16173         if ((Count.getOpcode() == ISD::CTLZ ||
16174              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16175             N0 == Count.getOperand(0) &&
16176             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16177           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16178       }
16179     }
16180   }
16181 
16182   return SDValue();
16183 }
16184 
16185 /// This is a stub for TargetLowering::SimplifySetCC.
16186 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16187                                    ISD::CondCode Cond, const SDLoc &DL,
16188                                    bool foldBooleans) {
16189   TargetLowering::DAGCombinerInfo
16190     DagCombineInfo(DAG, Level, false, this);
16191   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16192 }
16193 
16194 /// Given an ISD::SDIV node expressing a divide by constant, return
16195 /// a DAG expression to select that will generate the same value by multiplying
16196 /// by a magic number.
16197 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16198 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16199   // when optimising for minimum size, we don't want to expand a div to a mul
16200   // and a shift.
16201   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16202     return SDValue();
16203 
16204   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16205   if (!C)
16206     return SDValue();
16207 
16208   // Avoid division by zero.
16209   if (C->isNullValue())
16210     return SDValue();
16211 
16212   std::vector<SDNode*> Built;
16213   SDValue S =
16214       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16215 
16216   for (SDNode *N : Built)
16217     AddToWorklist(N);
16218   return S;
16219 }
16220 
16221 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16222 /// DAG expression that will generate the same value by right shifting.
16223 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16224   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16225   if (!C)
16226     return SDValue();
16227 
16228   // Avoid division by zero.
16229   if (C->isNullValue())
16230     return SDValue();
16231 
16232   std::vector<SDNode *> Built;
16233   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16234 
16235   for (SDNode *N : Built)
16236     AddToWorklist(N);
16237   return S;
16238 }
16239 
16240 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16241 /// expression that will generate the same value by multiplying by a magic
16242 /// number.
16243 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16244 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16245   // when optimising for minimum size, we don't want to expand a div to a mul
16246   // and a shift.
16247   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16248     return SDValue();
16249 
16250   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16251   if (!C)
16252     return SDValue();
16253 
16254   // Avoid division by zero.
16255   if (C->isNullValue())
16256     return SDValue();
16257 
16258   std::vector<SDNode*> Built;
16259   SDValue S =
16260       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16261 
16262   for (SDNode *N : Built)
16263     AddToWorklist(N);
16264   return S;
16265 }
16266 
16267 /// Determines the LogBase2 value for a non-null input value using the
16268 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16269 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16270   EVT VT = V.getValueType();
16271   unsigned EltBits = VT.getScalarSizeInBits();
16272   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16273   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16274   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16275   return LogBase2;
16276 }
16277 
16278 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16279 /// For the reciprocal, we need to find the zero of the function:
16280 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16281 ///     =>
16282 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16283 ///     does not require additional intermediate precision]
16284 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16285   if (Level >= AfterLegalizeDAG)
16286     return SDValue();
16287 
16288   // TODO: Handle half and/or extended types?
16289   EVT VT = Op.getValueType();
16290   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16291     return SDValue();
16292 
16293   // If estimates are explicitly disabled for this function, we're done.
16294   MachineFunction &MF = DAG.getMachineFunction();
16295   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16296   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16297     return SDValue();
16298 
16299   // Estimates may be explicitly enabled for this type with a custom number of
16300   // refinement steps.
16301   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16302   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16303     AddToWorklist(Est.getNode());
16304 
16305     if (Iterations) {
16306       EVT VT = Op.getValueType();
16307       SDLoc DL(Op);
16308       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16309 
16310       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16311       for (int i = 0; i < Iterations; ++i) {
16312         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16313         AddToWorklist(NewEst.getNode());
16314 
16315         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16316         AddToWorklist(NewEst.getNode());
16317 
16318         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16319         AddToWorklist(NewEst.getNode());
16320 
16321         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16322         AddToWorklist(Est.getNode());
16323       }
16324     }
16325     return Est;
16326   }
16327 
16328   return SDValue();
16329 }
16330 
16331 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16332 /// For the reciprocal sqrt, we need to find the zero of the function:
16333 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16334 ///     =>
16335 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16336 /// As a result, we precompute A/2 prior to the iteration loop.
16337 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16338                                          unsigned Iterations,
16339                                          SDNodeFlags Flags, bool Reciprocal) {
16340   EVT VT = Arg.getValueType();
16341   SDLoc DL(Arg);
16342   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16343 
16344   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16345   // this entire sequence requires only one FP constant.
16346   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16347   AddToWorklist(HalfArg.getNode());
16348 
16349   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16350   AddToWorklist(HalfArg.getNode());
16351 
16352   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16353   for (unsigned i = 0; i < Iterations; ++i) {
16354     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16355     AddToWorklist(NewEst.getNode());
16356 
16357     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16358     AddToWorklist(NewEst.getNode());
16359 
16360     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16361     AddToWorklist(NewEst.getNode());
16362 
16363     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16364     AddToWorklist(Est.getNode());
16365   }
16366 
16367   // If non-reciprocal square root is requested, multiply the result by Arg.
16368   if (!Reciprocal) {
16369     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16370     AddToWorklist(Est.getNode());
16371   }
16372 
16373   return Est;
16374 }
16375 
16376 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16377 /// For the reciprocal sqrt, we need to find the zero of the function:
16378 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16379 ///     =>
16380 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16381 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16382                                          unsigned Iterations,
16383                                          SDNodeFlags Flags, bool Reciprocal) {
16384   EVT VT = Arg.getValueType();
16385   SDLoc DL(Arg);
16386   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
16387   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
16388 
16389   // This routine must enter the loop below to work correctly
16390   // when (Reciprocal == false).
16391   assert(Iterations > 0);
16392 
16393   // Newton iterations for reciprocal square root:
16394   // E = (E * -0.5) * ((A * E) * E + -3.0)
16395   for (unsigned i = 0; i < Iterations; ++i) {
16396     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
16397     AddToWorklist(AE.getNode());
16398 
16399     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
16400     AddToWorklist(AEE.getNode());
16401 
16402     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
16403     AddToWorklist(RHS.getNode());
16404 
16405     // When calculating a square root at the last iteration build:
16406     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
16407     // (notice a common subexpression)
16408     SDValue LHS;
16409     if (Reciprocal || (i + 1) < Iterations) {
16410       // RSQRT: LHS = (E * -0.5)
16411       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
16412     } else {
16413       // SQRT: LHS = (A * E) * -0.5
16414       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
16415     }
16416     AddToWorklist(LHS.getNode());
16417 
16418     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
16419     AddToWorklist(Est.getNode());
16420   }
16421 
16422   return Est;
16423 }
16424 
16425 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
16426 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
16427 /// Op can be zero.
16428 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
16429                                            bool Reciprocal) {
16430   if (Level >= AfterLegalizeDAG)
16431     return SDValue();
16432 
16433   // TODO: Handle half and/or extended types?
16434   EVT VT = Op.getValueType();
16435   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16436     return SDValue();
16437 
16438   // If estimates are explicitly disabled for this function, we're done.
16439   MachineFunction &MF = DAG.getMachineFunction();
16440   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
16441   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16442     return SDValue();
16443 
16444   // Estimates may be explicitly enabled for this type with a custom number of
16445   // refinement steps.
16446   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
16447 
16448   bool UseOneConstNR = false;
16449   if (SDValue Est =
16450       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
16451                           Reciprocal)) {
16452     AddToWorklist(Est.getNode());
16453 
16454     if (Iterations) {
16455       Est = UseOneConstNR
16456             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
16457             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
16458 
16459       if (!Reciprocal) {
16460         // Unfortunately, Est is now NaN if the input was exactly 0.0.
16461         // Select out this case and force the answer to 0.0.
16462         EVT VT = Op.getValueType();
16463         SDLoc DL(Op);
16464 
16465         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
16466         EVT CCVT = getSetCCResultType(VT);
16467         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
16468         AddToWorklist(ZeroCmp.getNode());
16469 
16470         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
16471                           ZeroCmp, FPZero, Est);
16472         AddToWorklist(Est.getNode());
16473       }
16474     }
16475     return Est;
16476   }
16477 
16478   return SDValue();
16479 }
16480 
16481 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16482   return buildSqrtEstimateImpl(Op, Flags, true);
16483 }
16484 
16485 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
16486   return buildSqrtEstimateImpl(Op, Flags, false);
16487 }
16488 
16489 /// Return true if base is a frame index, which is known not to alias with
16490 /// anything but itself.  Provides base object and offset as results.
16491 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
16492                            const GlobalValue *&GV, const void *&CV) {
16493   // Assume it is a primitive operation.
16494   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
16495 
16496   // If it's an adding a simple constant then integrate the offset.
16497   if (Base.getOpcode() == ISD::ADD) {
16498     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
16499       Base = Base.getOperand(0);
16500       Offset += C->getSExtValue();
16501     }
16502   }
16503 
16504   // Return the underlying GlobalValue, and update the Offset.  Return false
16505   // for GlobalAddressSDNode since the same GlobalAddress may be represented
16506   // by multiple nodes with different offsets.
16507   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
16508     GV = G->getGlobal();
16509     Offset += G->getOffset();
16510     return false;
16511   }
16512 
16513   // Return the underlying Constant value, and update the Offset.  Return false
16514   // for ConstantSDNodes since the same constant pool entry may be represented
16515   // by multiple nodes with different offsets.
16516   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16517     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16518                                          : (const void *)C->getConstVal();
16519     Offset += C->getOffset();
16520     return false;
16521   }
16522   // If it's any of the following then it can't alias with anything but itself.
16523   return isa<FrameIndexSDNode>(Base);
16524 }
16525 
16526 /// Return true if there is any possibility that the two addresses overlap.
16527 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16528   // If they are the same then they must be aliases.
16529   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16530 
16531   // If they are both volatile then they cannot be reordered.
16532   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16533 
16534   // If one operation reads from invariant memory, and the other may store, they
16535   // cannot alias. These should really be checking the equivalent of mayWrite,
16536   // but it only matters for memory nodes other than load /store.
16537   if (Op0->isInvariant() && Op1->writeMem())
16538     return false;
16539 
16540   if (Op1->isInvariant() && Op0->writeMem())
16541     return false;
16542 
16543   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
16544   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
16545 
16546   // Check for BaseIndexOffset matching.
16547   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
16548   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
16549   int64_t PtrDiff;
16550   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
16551     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
16552 
16553   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
16554   // modified to use BaseIndexOffset.
16555 
16556   // Gather base node and offset information.
16557   SDValue Base0, Base1;
16558   int64_t Offset0, Offset1;
16559   const GlobalValue *GV0, *GV1;
16560   const void *CV0, *CV1;
16561   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
16562                                       Base0, Offset0, GV0, CV0);
16563   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
16564                                       Base1, Offset1, GV1, CV1);
16565 
16566   // If they have the same base address, then check to see if they overlap.
16567   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
16568     return !((Offset0 + NumBytes0) <= Offset1 ||
16569              (Offset1 + NumBytes1) <= Offset0);
16570 
16571   // It is possible for different frame indices to alias each other, mostly
16572   // when tail call optimization reuses return address slots for arguments.
16573   // To catch this case, look up the actual index of frame indices to compute
16574   // the real alias relationship.
16575   if (IsFrameIndex0 && IsFrameIndex1) {
16576     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16577     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
16578     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16579     return !((Offset0 + NumBytes0) <= Offset1 ||
16580              (Offset1 + NumBytes1) <= Offset0);
16581   }
16582 
16583   // Otherwise, if we know what the bases are, and they aren't identical, then
16584   // we know they cannot alias.
16585   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
16586     return false;
16587 
16588   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16589   // compared to the size and offset of the access, we may be able to prove they
16590   // do not alias. This check is conservative for now to catch cases created by
16591   // splitting vector types.
16592   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
16593   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
16594   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
16595   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
16596   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
16597       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
16598     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
16599     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
16600 
16601     // There is no overlap between these relatively aligned accesses of similar
16602     // size. Return no alias.
16603     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
16604         (OffAlign1 + NumBytes1) <= OffAlign0)
16605       return false;
16606   }
16607 
16608   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16609                    ? CombinerGlobalAA
16610                    : DAG.getSubtarget().useAA();
16611 #ifndef NDEBUG
16612   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16613       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16614     UseAA = false;
16615 #endif
16616 
16617   if (UseAA && AA &&
16618       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16619     // Use alias analysis information.
16620     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
16621     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
16622     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
16623     AliasResult AAResult =
16624         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
16625                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16626                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
16627                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
16628     if (AAResult == NoAlias)
16629       return false;
16630   }
16631 
16632   // Otherwise we have to assume they alias.
16633   return true;
16634 }
16635 
16636 /// Walk up chain skipping non-aliasing memory nodes,
16637 /// looking for aliasing nodes and adding them to the Aliases vector.
16638 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16639                                    SmallVectorImpl<SDValue> &Aliases) {
16640   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16641   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16642 
16643   // Get alias information for node.
16644   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16645 
16646   // Starting off.
16647   Chains.push_back(OriginalChain);
16648   unsigned Depth = 0;
16649 
16650   // Look at each chain and determine if it is an alias.  If so, add it to the
16651   // aliases list.  If not, then continue up the chain looking for the next
16652   // candidate.
16653   while (!Chains.empty()) {
16654     SDValue Chain = Chains.pop_back_val();
16655 
16656     // For TokenFactor nodes, look at each operand and only continue up the
16657     // chain until we reach the depth limit.
16658     //
16659     // FIXME: The depth check could be made to return the last non-aliasing
16660     // chain we found before we hit a tokenfactor rather than the original
16661     // chain.
16662     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16663       Aliases.clear();
16664       Aliases.push_back(OriginalChain);
16665       return;
16666     }
16667 
16668     // Don't bother if we've been before.
16669     if (!Visited.insert(Chain.getNode()).second)
16670       continue;
16671 
16672     switch (Chain.getOpcode()) {
16673     case ISD::EntryToken:
16674       // Entry token is ideal chain operand, but handled in FindBetterChain.
16675       break;
16676 
16677     case ISD::LOAD:
16678     case ISD::STORE: {
16679       // Get alias information for Chain.
16680       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16681           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16682 
16683       // If chain is alias then stop here.
16684       if (!(IsLoad && IsOpLoad) &&
16685           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16686         Aliases.push_back(Chain);
16687       } else {
16688         // Look further up the chain.
16689         Chains.push_back(Chain.getOperand(0));
16690         ++Depth;
16691       }
16692       break;
16693     }
16694 
16695     case ISD::TokenFactor:
16696       // We have to check each of the operands of the token factor for "small"
16697       // token factors, so we queue them up.  Adding the operands to the queue
16698       // (stack) in reverse order maintains the original order and increases the
16699       // likelihood that getNode will find a matching token factor (CSE.)
16700       if (Chain.getNumOperands() > 16) {
16701         Aliases.push_back(Chain);
16702         break;
16703       }
16704       for (unsigned n = Chain.getNumOperands(); n;)
16705         Chains.push_back(Chain.getOperand(--n));
16706       ++Depth;
16707       break;
16708 
16709     case ISD::CopyFromReg:
16710       // Forward past CopyFromReg.
16711       Chains.push_back(Chain.getOperand(0));
16712       ++Depth;
16713       break;
16714 
16715     default:
16716       // For all other instructions we will just have to take what we can get.
16717       Aliases.push_back(Chain);
16718       break;
16719     }
16720   }
16721 }
16722 
16723 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
16724 /// (aliasing node.)
16725 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
16726   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
16727 
16728   // Accumulate all the aliases to this node.
16729   GatherAllAliases(N, OldChain, Aliases);
16730 
16731   // If no operands then chain to entry token.
16732   if (Aliases.size() == 0)
16733     return DAG.getEntryNode();
16734 
16735   // If a single operand then chain to it.  We don't need to revisit it.
16736   if (Aliases.size() == 1)
16737     return Aliases[0];
16738 
16739   // Construct a custom tailored token factor.
16740   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
16741 }
16742 
16743 // This function tries to collect a bunch of potentially interesting
16744 // nodes to improve the chains of, all at once. This might seem
16745 // redundant, as this function gets called when visiting every store
16746 // node, so why not let the work be done on each store as it's visited?
16747 //
16748 // I believe this is mainly important because MergeConsecutiveStores
16749 // is unable to deal with merging stores of different sizes, so unless
16750 // we improve the chains of all the potential candidates up-front
16751 // before running MergeConsecutiveStores, it might only see some of
16752 // the nodes that will eventually be candidates, and then not be able
16753 // to go from a partially-merged state to the desired final
16754 // fully-merged state.
16755 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
16756   // This holds the base pointer, index, and the offset in bytes from the base
16757   // pointer.
16758   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
16759 
16760   // We must have a base and an offset.
16761   if (!BasePtr.getBase().getNode())
16762     return false;
16763 
16764   // Do not handle stores to undef base pointers.
16765   if (BasePtr.getBase().isUndef())
16766     return false;
16767 
16768   SmallVector<StoreSDNode *, 8> ChainedStores;
16769   ChainedStores.push_back(St);
16770 
16771   // Walk up the chain and look for nodes with offsets from the same
16772   // base pointer. Stop when reaching an instruction with a different kind
16773   // or instruction which has a different base pointer.
16774   StoreSDNode *Index = St;
16775   while (Index) {
16776     // If the chain has more than one use, then we can't reorder the mem ops.
16777     if (Index != St && !SDValue(Index, 0)->hasOneUse())
16778       break;
16779 
16780     if (Index->isVolatile() || Index->isIndexed())
16781       break;
16782 
16783     // Find the base pointer and offset for this memory node.
16784     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
16785 
16786     // Check that the base pointer is the same as the original one.
16787     if (!BasePtr.equalBaseIndex(Ptr, DAG))
16788       break;
16789 
16790     // Walk up the chain to find the next store node, ignoring any
16791     // intermediate loads. Any other kind of node will halt the loop.
16792     SDNode *NextInChain = Index->getChain().getNode();
16793     while (true) {
16794       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
16795         // We found a store node. Use it for the next iteration.
16796         if (STn->isVolatile() || STn->isIndexed()) {
16797           Index = nullptr;
16798           break;
16799         }
16800         ChainedStores.push_back(STn);
16801         Index = STn;
16802         break;
16803       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
16804         NextInChain = Ldn->getChain().getNode();
16805         continue;
16806       } else {
16807         Index = nullptr;
16808         break;
16809       }
16810     } // end while
16811   }
16812 
16813   // At this point, ChainedStores lists all of the Store nodes
16814   // reachable by iterating up through chain nodes matching the above
16815   // conditions.  For each such store identified, try to find an
16816   // earlier chain to attach the store to which won't violate the
16817   // required ordering.
16818   bool MadeChangeToSt = false;
16819   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
16820 
16821   for (StoreSDNode *ChainedStore : ChainedStores) {
16822     SDValue Chain = ChainedStore->getChain();
16823     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
16824 
16825     if (Chain != BetterChain) {
16826       if (ChainedStore == St)
16827         MadeChangeToSt = true;
16828       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
16829     }
16830   }
16831 
16832   // Do all replacements after finding the replacements to make to avoid making
16833   // the chains more complicated by introducing new TokenFactors.
16834   for (auto Replacement : BetterChains)
16835     replaceStoreChain(Replacement.first, Replacement.second);
16836 
16837   return MadeChangeToSt;
16838 }
16839 
16840 /// This is the entry point for the file.
16841 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
16842                            CodeGenOpt::Level OptLevel) {
16843   /// This is the main entry point to this class.
16844   DAGCombiner(*this, AA, OptLevel).Run(Level);
16845 }
16846