1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
29 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetOptions.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
43 #include "llvm/Target/TargetSubtargetInfo.h"
44 #include <algorithm>
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "dagcombine"
48 
49 STATISTIC(NodesCombined   , "Number of dag nodes combined");
50 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
51 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
52 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
53 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
54 STATISTIC(SlicedLoads, "Number of load sliced");
55 
56 namespace {
57   static cl::opt<bool>
58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61   static cl::opt<bool>
62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63                cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
66   static cl::opt<std::string>
67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68                cl::desc("Only use DAG-combiner alias analysis in this"
69                         " function"));
70 #endif
71 
72   /// Hidden option to stress test load slicing, i.e., when this option
73   /// is enabled, load slicing bypasses most of its profitability guards.
74   static cl::opt<bool>
75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76                     cl::desc("Bypass the profitability model of load "
77                              "slicing"),
78                     cl::init(false));
79 
80   static cl::opt<bool>
81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82                       cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86   class DAGCombiner {
87     SelectionDAG &DAG;
88     const TargetLowering &TLI;
89     CombineLevel Level;
90     CodeGenOpt::Level OptLevel;
91     bool LegalOperations;
92     bool LegalTypes;
93     bool ForCodeSize;
94 
95     /// \brief Worklist of all of the nodes that need to be simplified.
96     ///
97     /// This must behave as a stack -- new nodes to process are pushed onto the
98     /// back and when processing we pop off of the back.
99     ///
100     /// The worklist will not contain duplicates but may contain null entries
101     /// due to nodes being deleted from the underlying DAG.
102     SmallVector<SDNode *, 64> Worklist;
103 
104     /// \brief Mapping from an SDNode to its position on the worklist.
105     ///
106     /// This is used to find and remove nodes from the worklist (by nulling
107     /// them) when they are deleted from the underlying DAG. It relies on
108     /// stable indices of nodes within the worklist.
109     DenseMap<SDNode *, unsigned> WorklistMap;
110 
111     /// \brief Set of nodes which have been combined (at least once).
112     ///
113     /// This is used to allow us to reliably add any operands of a DAG node
114     /// which have not yet been combined to the worklist.
115     SmallPtrSet<SDNode *, 32> CombinedNodes;
116 
117     // AA - Used for DAG load/store alias analysis.
118     AliasAnalysis *AA;
119 
120     /// When an instruction is simplified, add all users of the instruction to
121     /// the work lists because they might get more simplified now.
122     void AddUsersToWorklist(SDNode *N) {
123       for (SDNode *Node : N->uses())
124         AddToWorklist(Node);
125     }
126 
127     /// Call the node-specific routine that folds each particular type of node.
128     SDValue visit(SDNode *N);
129 
130   public:
131     /// Add to the worklist making sure its instance is at the back (next to be
132     /// processed.)
133     void AddToWorklist(SDNode *N) {
134       assert(N->getOpcode() != ISD::DELETED_NODE &&
135              "Deleted Node added to Worklist");
136 
137       // Skip handle nodes as they can't usefully be combined and confuse the
138       // zero-use deletion strategy.
139       if (N->getOpcode() == ISD::HANDLENODE)
140         return;
141 
142       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
143         Worklist.push_back(N);
144     }
145 
146     /// Remove all instances of N from the worklist.
147     void removeFromWorklist(SDNode *N) {
148       CombinedNodes.erase(N);
149 
150       auto It = WorklistMap.find(N);
151       if (It == WorklistMap.end())
152         return; // Not in the worklist.
153 
154       // Null out the entry rather than erasing it to avoid a linear operation.
155       Worklist[It->second] = nullptr;
156       WorklistMap.erase(It);
157     }
158 
159     void deleteAndRecombine(SDNode *N);
160     bool recursivelyDeleteUnusedNodes(SDNode *N);
161 
162     /// Replaces all uses of the results of one DAG node with new values.
163     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
164                       bool AddTo = true);
165 
166     /// Replaces all uses of the results of one DAG node with new values.
167     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
168       return CombineTo(N, &Res, 1, AddTo);
169     }
170 
171     /// Replaces all uses of the results of one DAG node with new values.
172     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
173                       bool AddTo = true) {
174       SDValue To[] = { Res0, Res1 };
175       return CombineTo(N, To, 2, AddTo);
176     }
177 
178     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
179 
180   private:
181     unsigned MaximumLegalStoreInBits;
182 
183     /// Check the specified integer node value to see if it can be simplified or
184     /// if things it uses can be simplified by bit propagation.
185     /// If so, return true.
186     bool SimplifyDemandedBits(SDValue Op) {
187       unsigned BitWidth = Op.getScalarValueSizeInBits();
188       APInt Demanded = APInt::getAllOnesValue(BitWidth);
189       return SimplifyDemandedBits(Op, Demanded);
190     }
191 
192     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
193 
194     bool CombineToPreIndexedLoadStore(SDNode *N);
195     bool CombineToPostIndexedLoadStore(SDNode *N);
196     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
197     bool SliceUpLoad(SDNode *N);
198 
199     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
200     ///   load.
201     ///
202     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
203     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
204     /// \param EltNo index of the vector element to load.
205     /// \param OriginalLoad load that EVE came from to be replaced.
206     /// \returns EVE on success SDValue() on failure.
207     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
208         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
209     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
210     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
211     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
212     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
213     SDValue PromoteIntBinOp(SDValue Op);
214     SDValue PromoteIntShiftOp(SDValue Op);
215     SDValue PromoteExtend(SDValue Op);
216     bool PromoteLoad(SDValue Op);
217 
218     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
219                          SDValue ExtLoad, const SDLoc &DL,
220                          ISD::NodeType ExtType);
221 
222     /// Call the node-specific routine that knows how to fold each
223     /// particular type of node. If that doesn't do anything, try the
224     /// target-specific DAG combines.
225     SDValue combine(SDNode *N);
226 
227     // Visitation implementation - Implement dag node combining for different
228     // node types.  The semantics are as follows:
229     // Return Value:
230     //   SDValue.getNode() == 0 - No change was made
231     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
232     //   otherwise              - N should be replaced by the returned Operand.
233     //
234     SDValue visitTokenFactor(SDNode *N);
235     SDValue visitMERGE_VALUES(SDNode *N);
236     SDValue visitADD(SDNode *N);
237     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
238     SDValue visitSUB(SDNode *N);
239     SDValue visitADDC(SDNode *N);
240     SDValue visitUADDO(SDNode *N);
241     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
242     SDValue visitSUBC(SDNode *N);
243     SDValue visitUSUBO(SDNode *N);
244     SDValue visitADDE(SDNode *N);
245     SDValue visitADDCARRY(SDNode *N);
246     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
247     SDValue visitSUBE(SDNode *N);
248     SDValue visitSUBCARRY(SDNode *N);
249     SDValue visitMUL(SDNode *N);
250     SDValue useDivRem(SDNode *N);
251     SDValue visitSDIV(SDNode *N);
252     SDValue visitUDIV(SDNode *N);
253     SDValue visitREM(SDNode *N);
254     SDValue visitMULHU(SDNode *N);
255     SDValue visitMULHS(SDNode *N);
256     SDValue visitSMUL_LOHI(SDNode *N);
257     SDValue visitUMUL_LOHI(SDNode *N);
258     SDValue visitSMULO(SDNode *N);
259     SDValue visitUMULO(SDNode *N);
260     SDValue visitIMINMAX(SDNode *N);
261     SDValue visitAND(SDNode *N);
262     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
263     SDValue visitOR(SDNode *N);
264     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
265     SDValue visitXOR(SDNode *N);
266     SDValue SimplifyVBinOp(SDNode *N);
267     SDValue visitSHL(SDNode *N);
268     SDValue visitSRA(SDNode *N);
269     SDValue visitSRL(SDNode *N);
270     SDValue visitRotate(SDNode *N);
271     SDValue visitABS(SDNode *N);
272     SDValue visitBSWAP(SDNode *N);
273     SDValue visitBITREVERSE(SDNode *N);
274     SDValue visitCTLZ(SDNode *N);
275     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
276     SDValue visitCTTZ(SDNode *N);
277     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
278     SDValue visitCTPOP(SDNode *N);
279     SDValue visitSELECT(SDNode *N);
280     SDValue visitVSELECT(SDNode *N);
281     SDValue visitSELECT_CC(SDNode *N);
282     SDValue visitSETCC(SDNode *N);
283     SDValue visitSETCCE(SDNode *N);
284     SDValue visitSETCCCARRY(SDNode *N);
285     SDValue visitSIGN_EXTEND(SDNode *N);
286     SDValue visitZERO_EXTEND(SDNode *N);
287     SDValue visitANY_EXTEND(SDNode *N);
288     SDValue visitAssertZext(SDNode *N);
289     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
290     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
291     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
292     SDValue visitTRUNCATE(SDNode *N);
293     SDValue visitBITCAST(SDNode *N);
294     SDValue visitBUILD_PAIR(SDNode *N);
295     SDValue visitFADD(SDNode *N);
296     SDValue visitFSUB(SDNode *N);
297     SDValue visitFMUL(SDNode *N);
298     SDValue visitFMA(SDNode *N);
299     SDValue visitFDIV(SDNode *N);
300     SDValue visitFREM(SDNode *N);
301     SDValue visitFSQRT(SDNode *N);
302     SDValue visitFCOPYSIGN(SDNode *N);
303     SDValue visitSINT_TO_FP(SDNode *N);
304     SDValue visitUINT_TO_FP(SDNode *N);
305     SDValue visitFP_TO_SINT(SDNode *N);
306     SDValue visitFP_TO_UINT(SDNode *N);
307     SDValue visitFP_ROUND(SDNode *N);
308     SDValue visitFP_ROUND_INREG(SDNode *N);
309     SDValue visitFP_EXTEND(SDNode *N);
310     SDValue visitFNEG(SDNode *N);
311     SDValue visitFABS(SDNode *N);
312     SDValue visitFCEIL(SDNode *N);
313     SDValue visitFTRUNC(SDNode *N);
314     SDValue visitFFLOOR(SDNode *N);
315     SDValue visitFMINNUM(SDNode *N);
316     SDValue visitFMAXNUM(SDNode *N);
317     SDValue visitBRCOND(SDNode *N);
318     SDValue visitBR_CC(SDNode *N);
319     SDValue visitLOAD(SDNode *N);
320 
321     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
322     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
323 
324     SDValue visitSTORE(SDNode *N);
325     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
326     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
327     SDValue visitBUILD_VECTOR(SDNode *N);
328     SDValue visitCONCAT_VECTORS(SDNode *N);
329     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
330     SDValue visitVECTOR_SHUFFLE(SDNode *N);
331     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
332     SDValue visitINSERT_SUBVECTOR(SDNode *N);
333     SDValue visitMLOAD(SDNode *N);
334     SDValue visitMSTORE(SDNode *N);
335     SDValue visitMGATHER(SDNode *N);
336     SDValue visitMSCATTER(SDNode *N);
337     SDValue visitFP_TO_FP16(SDNode *N);
338     SDValue visitFP16_TO_FP(SDNode *N);
339 
340     SDValue visitFADDForFMACombine(SDNode *N);
341     SDValue visitFSUBForFMACombine(SDNode *N);
342     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
343 
344     SDValue XformToShuffleWithZero(SDNode *N);
345     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
346                            SDValue RHS);
347 
348     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
349 
350     SDValue foldSelectOfConstants(SDNode *N);
351     SDValue foldVSelectOfConstants(SDNode *N);
352     SDValue foldBinOpIntoSelect(SDNode *BO);
353     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
354     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
355     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
356     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
357                              SDValue N2, SDValue N3, ISD::CondCode CC,
358                              bool NotExtCompare = false);
359     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
360                                    SDValue N2, SDValue N3, ISD::CondCode CC);
361     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
362                               const SDLoc &DL);
363     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
364                           const SDLoc &DL, bool foldBooleans = true);
365 
366     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
367                            SDValue &CC) const;
368     bool isOneUseSetCC(SDValue N) const;
369 
370     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
371                                          unsigned HiOp);
372     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
373     SDValue CombineExtLoad(SDNode *N);
374     SDValue combineRepeatedFPDivisors(SDNode *N);
375     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
376     SDValue BuildSDIV(SDNode *N);
377     SDValue BuildSDIVPow2(SDNode *N);
378     SDValue BuildUDIV(SDNode *N);
379     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
380     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
381     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
382     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
383     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
384     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
385                                 SDNodeFlags Flags, bool Reciprocal);
386     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
387                                 SDNodeFlags Flags, bool Reciprocal);
388     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
389                                bool DemandHighBits = true);
390     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
391     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
392                               SDValue InnerPos, SDValue InnerNeg,
393                               unsigned PosOpcode, unsigned NegOpcode,
394                               const SDLoc &DL);
395     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
396     SDValue MatchLoadCombine(SDNode *N);
397     SDValue ReduceLoadWidth(SDNode *N);
398     SDValue ReduceLoadOpStoreWidth(SDNode *N);
399     SDValue splitMergedValStore(StoreSDNode *ST);
400     SDValue TransformFPLoadStorePair(SDNode *N);
401     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
402     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
403     SDValue reduceBuildVecToShuffle(SDNode *N);
404     SDValue reduceBuildVecToTrunc(SDNode *N);
405     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
406                                   ArrayRef<int> VectorMask, SDValue VecIn1,
407                                   SDValue VecIn2, unsigned LeftIdx);
408     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
409 
410     /// Walk up chain skipping non-aliasing memory nodes,
411     /// looking for aliasing nodes and adding them to the Aliases vector.
412     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
413                           SmallVectorImpl<SDValue> &Aliases);
414 
415     /// Return true if there is any possibility that the two addresses overlap.
416     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
417 
418     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
419     /// chain (aliasing node.)
420     SDValue FindBetterChain(SDNode *N, SDValue Chain);
421 
422     /// Try to replace a store and any possibly adjacent stores on
423     /// consecutive chains with better chains. Return true only if St is
424     /// replaced.
425     ///
426     /// Notice that other chains may still be replaced even if the function
427     /// returns false.
428     bool findBetterNeighborChains(StoreSDNode *St);
429 
430     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
431     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
432 
433     /// Holds a pointer to an LSBaseSDNode as well as information on where it
434     /// is located in a sequence of memory operations connected by a chain.
435     struct MemOpLink {
436       MemOpLink(LSBaseSDNode *N, int64_t Offset)
437           : MemNode(N), OffsetFromBase(Offset) {}
438       // Ptr to the mem node.
439       LSBaseSDNode *MemNode;
440       // Offset from the base ptr.
441       int64_t OffsetFromBase;
442     };
443 
444     /// This is a helper function for visitMUL to check the profitability
445     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
446     /// MulNode is the original multiply, AddNode is (add x, c1),
447     /// and ConstNode is c2.
448     bool isMulAddWithConstProfitable(SDNode *MulNode,
449                                      SDValue &AddNode,
450                                      SDValue &ConstNode);
451 
452 
453     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
454     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
455     /// the type of the loaded value to be extended.  LoadedVT returns the type
456     /// of the original loaded value.  NarrowLoad returns whether the load would
457     /// need to be narrowed in order to match.
458     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
459                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
460                           bool &NarrowLoad);
461 
462     /// Helper function for MergeConsecutiveStores which merges the
463     /// component store chains.
464     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
465                                 unsigned NumStores);
466 
467     /// This is a helper function for MergeConsecutiveStores. When the
468     /// source elements of the consecutive stores are all constants or
469     /// all extracted vector elements, try to merge them into one
470     /// larger store introducing bitcasts if necessary.  \return True
471     /// if a merged store was created.
472     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
473                                          EVT MemVT, unsigned NumStores,
474                                          bool IsConstantSrc, bool UseVector,
475                                          bool UseTrunc);
476 
477     /// This is a helper function for MergeConsecutiveStores. Stores
478     /// that potentially may be merged with St are placed in
479     /// StoreNodes.
480     void getStoreMergeCandidates(StoreSDNode *St,
481                                  SmallVectorImpl<MemOpLink> &StoreNodes);
482 
483     /// Helper function for MergeConsecutiveStores. Checks if
484     /// candidate stores have indirect dependency through their
485     /// operands. \return True if safe to merge.
486     bool checkMergeStoreCandidatesForDependencies(
487         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
488 
489     /// Merge consecutive store operations into a wide store.
490     /// This optimization uses wide integers or vectors when possible.
491     /// \return number of stores that were merged into a merged store (the
492     /// affected nodes are stored as a prefix in \p StoreNodes).
493     bool MergeConsecutiveStores(StoreSDNode *N);
494 
495     /// \brief Try to transform a truncation where C is a constant:
496     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
497     ///
498     /// \p N needs to be a truncation and its first operand an AND. Other
499     /// requirements are checked by the function (e.g. that trunc is
500     /// single-use) and if missed an empty SDValue is returned.
501     SDValue distributeTruncateThroughAnd(SDNode *N);
502 
503   public:
504     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
505         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
506           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
507       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
508 
509       MaximumLegalStoreInBits = 0;
510       for (MVT VT : MVT::all_valuetypes())
511         if (EVT(VT).isSimple() && VT != MVT::Other &&
512             TLI.isTypeLegal(EVT(VT)) &&
513             VT.getSizeInBits() >= MaximumLegalStoreInBits)
514           MaximumLegalStoreInBits = VT.getSizeInBits();
515     }
516 
517     /// Runs the dag combiner on all nodes in the work list
518     void Run(CombineLevel AtLevel);
519 
520     SelectionDAG &getDAG() const { return DAG; }
521 
522     /// Returns a type large enough to hold any valid shift amount - before type
523     /// legalization these can be huge.
524     EVT getShiftAmountTy(EVT LHSTy) {
525       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
526       if (LHSTy.isVector())
527         return LHSTy;
528       auto &DL = DAG.getDataLayout();
529       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
530                         : TLI.getPointerTy(DL);
531     }
532 
533     /// This method returns true if we are running before type legalization or
534     /// if the specified VT is legal.
535     bool isTypeLegal(const EVT &VT) {
536       if (!LegalTypes) return true;
537       return TLI.isTypeLegal(VT);
538     }
539 
540     /// Convenience wrapper around TargetLowering::getSetCCResultType
541     EVT getSetCCResultType(EVT VT) const {
542       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
543     }
544   };
545 }
546 
547 
548 namespace {
549 /// This class is a DAGUpdateListener that removes any deleted
550 /// nodes from the worklist.
551 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
552   DAGCombiner &DC;
553 public:
554   explicit WorklistRemover(DAGCombiner &dc)
555     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
556 
557   void NodeDeleted(SDNode *N, SDNode *E) override {
558     DC.removeFromWorklist(N);
559   }
560 };
561 }
562 
563 //===----------------------------------------------------------------------===//
564 //  TargetLowering::DAGCombinerInfo implementation
565 //===----------------------------------------------------------------------===//
566 
567 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
568   ((DAGCombiner*)DC)->AddToWorklist(N);
569 }
570 
571 SDValue TargetLowering::DAGCombinerInfo::
572 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
573   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
574 }
575 
576 SDValue TargetLowering::DAGCombinerInfo::
577 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
578   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
579 }
580 
581 
582 SDValue TargetLowering::DAGCombinerInfo::
583 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
584   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
585 }
586 
587 void TargetLowering::DAGCombinerInfo::
588 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
589   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
590 }
591 
592 //===----------------------------------------------------------------------===//
593 // Helper Functions
594 //===----------------------------------------------------------------------===//
595 
596 void DAGCombiner::deleteAndRecombine(SDNode *N) {
597   removeFromWorklist(N);
598 
599   // If the operands of this node are only used by the node, they will now be
600   // dead. Make sure to re-visit them and recursively delete dead nodes.
601   for (const SDValue &Op : N->ops())
602     // For an operand generating multiple values, one of the values may
603     // become dead allowing further simplification (e.g. split index
604     // arithmetic from an indexed load).
605     if (Op->hasOneUse() || Op->getNumValues() > 1)
606       AddToWorklist(Op.getNode());
607 
608   DAG.DeleteNode(N);
609 }
610 
611 /// Return 1 if we can compute the negated form of the specified expression for
612 /// the same cost as the expression itself, or 2 if we can compute the negated
613 /// form more cheaply than the expression itself.
614 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
615                                const TargetLowering &TLI,
616                                const TargetOptions *Options,
617                                unsigned Depth = 0) {
618   // fneg is removable even if it has multiple uses.
619   if (Op.getOpcode() == ISD::FNEG) return 2;
620 
621   // Don't allow anything with multiple uses.
622   if (!Op.hasOneUse()) return 0;
623 
624   // Don't recurse exponentially.
625   if (Depth > 6) return 0;
626 
627   switch (Op.getOpcode()) {
628   default: return false;
629   case ISD::ConstantFP: {
630     if (!LegalOperations)
631       return 1;
632 
633     // Don't invert constant FP values after legalization unless the target says
634     // the negated constant is legal.
635     EVT VT = Op.getValueType();
636     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
637       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
638   }
639   case ISD::FADD:
640     // FIXME: determine better conditions for this xform.
641     if (!Options->UnsafeFPMath) return 0;
642 
643     // After operation legalization, it might not be legal to create new FSUBs.
644     if (LegalOperations &&
645         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
646       return 0;
647 
648     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
649     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
650                                     Options, Depth + 1))
651       return V;
652     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
653     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
654                               Depth + 1);
655   case ISD::FSUB:
656     // We can't turn -(A-B) into B-A when we honor signed zeros.
657     if (!Options->NoSignedZerosFPMath &&
658         !Op.getNode()->getFlags().hasNoSignedZeros())
659       return 0;
660 
661     // fold (fneg (fsub A, B)) -> (fsub B, A)
662     return 1;
663 
664   case ISD::FMUL:
665   case ISD::FDIV:
666     if (Options->HonorSignDependentRoundingFPMath()) return 0;
667 
668     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
669     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
670                                     Options, Depth + 1))
671       return V;
672 
673     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
674                               Depth + 1);
675 
676   case ISD::FP_EXTEND:
677   case ISD::FP_ROUND:
678   case ISD::FSIN:
679     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
680                               Depth + 1);
681   }
682 }
683 
684 /// If isNegatibleForFree returns true, return the newly negated expression.
685 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
686                                     bool LegalOperations, unsigned Depth = 0) {
687   const TargetOptions &Options = DAG.getTarget().Options;
688   // fneg is removable even if it has multiple uses.
689   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
690 
691   // Don't allow anything with multiple uses.
692   assert(Op.hasOneUse() && "Unknown reuse!");
693 
694   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
695 
696   const SDNodeFlags Flags = Op.getNode()->getFlags();
697 
698   switch (Op.getOpcode()) {
699   default: llvm_unreachable("Unknown code");
700   case ISD::ConstantFP: {
701     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
702     V.changeSign();
703     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
704   }
705   case ISD::FADD:
706     // FIXME: determine better conditions for this xform.
707     assert(Options.UnsafeFPMath);
708 
709     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
710     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
711                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
712       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
713                          GetNegatedExpression(Op.getOperand(0), DAG,
714                                               LegalOperations, Depth+1),
715                          Op.getOperand(1), Flags);
716     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
717     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
718                        GetNegatedExpression(Op.getOperand(1), DAG,
719                                             LegalOperations, Depth+1),
720                        Op.getOperand(0), Flags);
721   case ISD::FSUB:
722     // fold (fneg (fsub 0, B)) -> B
723     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
724       if (N0CFP->isZero())
725         return Op.getOperand(1);
726 
727     // fold (fneg (fsub A, B)) -> (fsub B, A)
728     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
729                        Op.getOperand(1), Op.getOperand(0), Flags);
730 
731   case ISD::FMUL:
732   case ISD::FDIV:
733     assert(!Options.HonorSignDependentRoundingFPMath());
734 
735     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
736     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
737                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
738       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
739                          GetNegatedExpression(Op.getOperand(0), DAG,
740                                               LegalOperations, Depth+1),
741                          Op.getOperand(1), Flags);
742 
743     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
744     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
745                        Op.getOperand(0),
746                        GetNegatedExpression(Op.getOperand(1), DAG,
747                                             LegalOperations, Depth+1), Flags);
748 
749   case ISD::FP_EXTEND:
750   case ISD::FSIN:
751     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
752                        GetNegatedExpression(Op.getOperand(0), DAG,
753                                             LegalOperations, Depth+1));
754   case ISD::FP_ROUND:
755       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
756                          GetNegatedExpression(Op.getOperand(0), DAG,
757                                               LegalOperations, Depth+1),
758                          Op.getOperand(1));
759   }
760 }
761 
762 // APInts must be the same size for most operations, this helper
763 // function zero extends the shorter of the pair so that they match.
764 // We provide an Offset so that we can create bitwidths that won't overflow.
765 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
766   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
767   LHS = LHS.zextOrSelf(Bits);
768   RHS = RHS.zextOrSelf(Bits);
769 }
770 
771 // Return true if this node is a setcc, or is a select_cc
772 // that selects between the target values used for true and false, making it
773 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
774 // the appropriate nodes based on the type of node we are checking. This
775 // simplifies life a bit for the callers.
776 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
777                                     SDValue &CC) const {
778   if (N.getOpcode() == ISD::SETCC) {
779     LHS = N.getOperand(0);
780     RHS = N.getOperand(1);
781     CC  = N.getOperand(2);
782     return true;
783   }
784 
785   if (N.getOpcode() != ISD::SELECT_CC ||
786       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
787       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
788     return false;
789 
790   if (TLI.getBooleanContents(N.getValueType()) ==
791       TargetLowering::UndefinedBooleanContent)
792     return false;
793 
794   LHS = N.getOperand(0);
795   RHS = N.getOperand(1);
796   CC  = N.getOperand(4);
797   return true;
798 }
799 
800 /// Return true if this is a SetCC-equivalent operation with only one use.
801 /// If this is true, it allows the users to invert the operation for free when
802 /// it is profitable to do so.
803 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
804   SDValue N0, N1, N2;
805   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
806     return true;
807   return false;
808 }
809 
810 // \brief Returns the SDNode if it is a constant float BuildVector
811 // or constant float.
812 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
813   if (isa<ConstantFPSDNode>(N))
814     return N.getNode();
815   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
816     return N.getNode();
817   return nullptr;
818 }
819 
820 // Determines if it is a constant integer or a build vector of constant
821 // integers (and undefs).
822 // Do not permit build vector implicit truncation.
823 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
824   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
825     return !(Const->isOpaque() && NoOpaques);
826   if (N.getOpcode() != ISD::BUILD_VECTOR)
827     return false;
828   unsigned BitWidth = N.getScalarValueSizeInBits();
829   for (const SDValue &Op : N->op_values()) {
830     if (Op.isUndef())
831       continue;
832     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
833     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
834         (Const->isOpaque() && NoOpaques))
835       return false;
836   }
837   return true;
838 }
839 
840 // Determines if it is a constant null integer or a splatted vector of a
841 // constant null integer (with no undefs).
842 // Build vector implicit truncation is not an issue for null values.
843 static bool isNullConstantOrNullSplatConstant(SDValue N) {
844   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
845     return Splat->isNullValue();
846   return false;
847 }
848 
849 // Determines if it is a constant integer of one or a splatted vector of a
850 // constant integer of one (with no undefs).
851 // Do not permit build vector implicit truncation.
852 static bool isOneConstantOrOneSplatConstant(SDValue N) {
853   unsigned BitWidth = N.getScalarValueSizeInBits();
854   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
855     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
856   return false;
857 }
858 
859 // Determines if it is a constant integer of all ones or a splatted vector of a
860 // constant integer of all ones (with no undefs).
861 // Do not permit build vector implicit truncation.
862 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
863   unsigned BitWidth = N.getScalarValueSizeInBits();
864   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
865     return Splat->isAllOnesValue() &&
866            Splat->getAPIntValue().getBitWidth() == BitWidth;
867   return false;
868 }
869 
870 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
871 // undef's.
872 static bool isAnyConstantBuildVector(const SDNode *N) {
873   return ISD::isBuildVectorOfConstantSDNodes(N) ||
874          ISD::isBuildVectorOfConstantFPSDNodes(N);
875 }
876 
877 // Attempt to match a unary predicate against a scalar/splat constant or
878 // every element of a constant BUILD_VECTOR.
879 static bool matchUnaryPredicate(SDValue Op,
880                                 std::function<bool(ConstantSDNode *)> Match) {
881   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
882     return Match(Cst);
883 
884   if (ISD::BUILD_VECTOR != Op.getOpcode())
885     return false;
886 
887   EVT SVT = Op.getValueType().getScalarType();
888   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
889     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
890     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
891       return false;
892   }
893   return true;
894 }
895 
896 // Attempt to match a binary predicate against a pair of scalar/splat constants
897 // or every element of a pair of constant BUILD_VECTORs.
898 static bool matchBinaryPredicate(
899     SDValue LHS, SDValue RHS,
900     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
901   if (LHS.getValueType() != RHS.getValueType())
902     return false;
903 
904   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
905     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
906       return Match(LHSCst, RHSCst);
907 
908   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
909       ISD::BUILD_VECTOR != RHS.getOpcode())
910     return false;
911 
912   EVT SVT = LHS.getValueType().getScalarType();
913   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
914     auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
915     auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
916     if (!LHSCst || !RHSCst)
917       return false;
918     if (LHSCst->getValueType(0) != SVT ||
919         LHSCst->getValueType(0) != RHSCst->getValueType(0))
920       return false;
921     if (!Match(LHSCst, RHSCst))
922       return false;
923   }
924   return true;
925 }
926 
927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
928                                     SDValue N1) {
929   EVT VT = N0.getValueType();
930   if (N0.getOpcode() == Opc) {
931     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
932       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
933         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
934         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
935           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
936         return SDValue();
937       }
938       if (N0.hasOneUse()) {
939         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
940         // use
941         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
942         if (!OpNode.getNode())
943           return SDValue();
944         AddToWorklist(OpNode.getNode());
945         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
946       }
947     }
948   }
949 
950   if (N1.getOpcode() == Opc) {
951     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
952       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
953         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
954         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
955           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
956         return SDValue();
957       }
958       if (N1.hasOneUse()) {
959         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
960         // use
961         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
962         if (!OpNode.getNode())
963           return SDValue();
964         AddToWorklist(OpNode.getNode());
965         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
966       }
967     }
968   }
969 
970   return SDValue();
971 }
972 
973 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
974                                bool AddTo) {
975   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
976   ++NodesCombined;
977   DEBUG(dbgs() << "\nReplacing.1 ";
978         N->dump(&DAG);
979         dbgs() << "\nWith: ";
980         To[0].getNode()->dump(&DAG);
981         dbgs() << " and " << NumTo-1 << " other values\n");
982   for (unsigned i = 0, e = NumTo; i != e; ++i)
983     assert((!To[i].getNode() ||
984             N->getValueType(i) == To[i].getValueType()) &&
985            "Cannot combine value to value of different type!");
986 
987   WorklistRemover DeadNodes(*this);
988   DAG.ReplaceAllUsesWith(N, To);
989   if (AddTo) {
990     // Push the new nodes and any users onto the worklist
991     for (unsigned i = 0, e = NumTo; i != e; ++i) {
992       if (To[i].getNode()) {
993         AddToWorklist(To[i].getNode());
994         AddUsersToWorklist(To[i].getNode());
995       }
996     }
997   }
998 
999   // Finally, if the node is now dead, remove it from the graph.  The node
1000   // may not be dead if the replacement process recursively simplified to
1001   // something else needing this node.
1002   if (N->use_empty())
1003     deleteAndRecombine(N);
1004   return SDValue(N, 0);
1005 }
1006 
1007 void DAGCombiner::
1008 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1009   // Replace all uses.  If any nodes become isomorphic to other nodes and
1010   // are deleted, make sure to remove them from our worklist.
1011   WorklistRemover DeadNodes(*this);
1012   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1013 
1014   // Push the new node and any (possibly new) users onto the worklist.
1015   AddToWorklist(TLO.New.getNode());
1016   AddUsersToWorklist(TLO.New.getNode());
1017 
1018   // Finally, if the node is now dead, remove it from the graph.  The node
1019   // may not be dead if the replacement process recursively simplified to
1020   // something else needing this node.
1021   if (TLO.Old.getNode()->use_empty())
1022     deleteAndRecombine(TLO.Old.getNode());
1023 }
1024 
1025 /// Check the specified integer node value to see if it can be simplified or if
1026 /// things it uses can be simplified by bit propagation. If so, return true.
1027 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1028   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1029   KnownBits Known;
1030   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1031     return false;
1032 
1033   // Revisit the node.
1034   AddToWorklist(Op.getNode());
1035 
1036   // Replace the old value with the new one.
1037   ++NodesCombined;
1038   DEBUG(dbgs() << "\nReplacing.2 ";
1039         TLO.Old.getNode()->dump(&DAG);
1040         dbgs() << "\nWith: ";
1041         TLO.New.getNode()->dump(&DAG);
1042         dbgs() << '\n');
1043 
1044   CommitTargetLoweringOpt(TLO);
1045   return true;
1046 }
1047 
1048 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1049   SDLoc DL(Load);
1050   EVT VT = Load->getValueType(0);
1051   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1052 
1053   DEBUG(dbgs() << "\nReplacing.9 ";
1054         Load->dump(&DAG);
1055         dbgs() << "\nWith: ";
1056         Trunc.getNode()->dump(&DAG);
1057         dbgs() << '\n');
1058   WorklistRemover DeadNodes(*this);
1059   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1060   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1061   deleteAndRecombine(Load);
1062   AddToWorklist(Trunc.getNode());
1063 }
1064 
1065 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1066   Replace = false;
1067   SDLoc DL(Op);
1068   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1069     LoadSDNode *LD = cast<LoadSDNode>(Op);
1070     EVT MemVT = LD->getMemoryVT();
1071     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1072       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1073                                                        : ISD::EXTLOAD)
1074       : LD->getExtensionType();
1075     Replace = true;
1076     return DAG.getExtLoad(ExtType, DL, PVT,
1077                           LD->getChain(), LD->getBasePtr(),
1078                           MemVT, LD->getMemOperand());
1079   }
1080 
1081   unsigned Opc = Op.getOpcode();
1082   switch (Opc) {
1083   default: break;
1084   case ISD::AssertSext:
1085     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1086       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1087     break;
1088   case ISD::AssertZext:
1089     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1090       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1091     break;
1092   case ISD::Constant: {
1093     unsigned ExtOpc =
1094       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1095     return DAG.getNode(ExtOpc, DL, PVT, Op);
1096   }
1097   }
1098 
1099   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1100     return SDValue();
1101   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1102 }
1103 
1104 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1105   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1106     return SDValue();
1107   EVT OldVT = Op.getValueType();
1108   SDLoc DL(Op);
1109   bool Replace = false;
1110   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1111   if (!NewOp.getNode())
1112     return SDValue();
1113   AddToWorklist(NewOp.getNode());
1114 
1115   if (Replace)
1116     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1117   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1118                      DAG.getValueType(OldVT));
1119 }
1120 
1121 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1122   EVT OldVT = Op.getValueType();
1123   SDLoc DL(Op);
1124   bool Replace = false;
1125   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1126   if (!NewOp.getNode())
1127     return SDValue();
1128   AddToWorklist(NewOp.getNode());
1129 
1130   if (Replace)
1131     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1132   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1133 }
1134 
1135 /// Promote the specified integer binary operation if the target indicates it is
1136 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1137 /// i32 since i16 instructions are longer.
1138 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1139   if (!LegalOperations)
1140     return SDValue();
1141 
1142   EVT VT = Op.getValueType();
1143   if (VT.isVector() || !VT.isInteger())
1144     return SDValue();
1145 
1146   // If operation type is 'undesirable', e.g. i16 on x86, consider
1147   // promoting it.
1148   unsigned Opc = Op.getOpcode();
1149   if (TLI.isTypeDesirableForOp(Opc, VT))
1150     return SDValue();
1151 
1152   EVT PVT = VT;
1153   // Consult target whether it is a good idea to promote this operation and
1154   // what's the right type to promote it to.
1155   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1156     assert(PVT != VT && "Don't know what type to promote to!");
1157 
1158     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1159 
1160     bool Replace0 = false;
1161     SDValue N0 = Op.getOperand(0);
1162     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1163 
1164     bool Replace1 = false;
1165     SDValue N1 = Op.getOperand(1);
1166     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1167     SDLoc DL(Op);
1168 
1169     SDValue RV =
1170         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1171 
1172     // We are always replacing N0/N1's use in N and only need
1173     // additional replacements if there are additional uses.
1174     Replace0 &= !N0->hasOneUse();
1175     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1176 
1177     // Combine Op here so it is presreved past replacements.
1178     CombineTo(Op.getNode(), RV);
1179 
1180     // If operands have a use ordering, make sur we deal with
1181     // predecessor first.
1182     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1183       std::swap(N0, N1);
1184       std::swap(NN0, NN1);
1185     }
1186 
1187     if (Replace0) {
1188       AddToWorklist(NN0.getNode());
1189       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1190     }
1191     if (Replace1) {
1192       AddToWorklist(NN1.getNode());
1193       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1194     }
1195     return Op;
1196   }
1197   return SDValue();
1198 }
1199 
1200 /// Promote the specified integer shift operation if the target indicates it is
1201 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1202 /// i32 since i16 instructions are longer.
1203 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1204   if (!LegalOperations)
1205     return SDValue();
1206 
1207   EVT VT = Op.getValueType();
1208   if (VT.isVector() || !VT.isInteger())
1209     return SDValue();
1210 
1211   // If operation type is 'undesirable', e.g. i16 on x86, consider
1212   // promoting it.
1213   unsigned Opc = Op.getOpcode();
1214   if (TLI.isTypeDesirableForOp(Opc, VT))
1215     return SDValue();
1216 
1217   EVT PVT = VT;
1218   // Consult target whether it is a good idea to promote this operation and
1219   // what's the right type to promote it to.
1220   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1221     assert(PVT != VT && "Don't know what type to promote to!");
1222 
1223     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1224 
1225     bool Replace = false;
1226     SDValue N0 = Op.getOperand(0);
1227     SDValue N1 = Op.getOperand(1);
1228     if (Opc == ISD::SRA)
1229       N0 = SExtPromoteOperand(N0, PVT);
1230     else if (Opc == ISD::SRL)
1231       N0 = ZExtPromoteOperand(N0, PVT);
1232     else
1233       N0 = PromoteOperand(N0, PVT, Replace);
1234 
1235     if (!N0.getNode())
1236       return SDValue();
1237 
1238     SDLoc DL(Op);
1239     SDValue RV =
1240         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1241 
1242     AddToWorklist(N0.getNode());
1243     if (Replace)
1244       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1245 
1246     // Deal with Op being deleted.
1247     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1248       return RV;
1249   }
1250   return SDValue();
1251 }
1252 
1253 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1254   if (!LegalOperations)
1255     return SDValue();
1256 
1257   EVT VT = Op.getValueType();
1258   if (VT.isVector() || !VT.isInteger())
1259     return SDValue();
1260 
1261   // If operation type is 'undesirable', e.g. i16 on x86, consider
1262   // promoting it.
1263   unsigned Opc = Op.getOpcode();
1264   if (TLI.isTypeDesirableForOp(Opc, VT))
1265     return SDValue();
1266 
1267   EVT PVT = VT;
1268   // Consult target whether it is a good idea to promote this operation and
1269   // what's the right type to promote it to.
1270   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271     assert(PVT != VT && "Don't know what type to promote to!");
1272     // fold (aext (aext x)) -> (aext x)
1273     // fold (aext (zext x)) -> (zext x)
1274     // fold (aext (sext x)) -> (sext x)
1275     DEBUG(dbgs() << "\nPromoting ";
1276           Op.getNode()->dump(&DAG));
1277     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1278   }
1279   return SDValue();
1280 }
1281 
1282 bool DAGCombiner::PromoteLoad(SDValue Op) {
1283   if (!LegalOperations)
1284     return false;
1285 
1286   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1287     return false;
1288 
1289   EVT VT = Op.getValueType();
1290   if (VT.isVector() || !VT.isInteger())
1291     return false;
1292 
1293   // If operation type is 'undesirable', e.g. i16 on x86, consider
1294   // promoting it.
1295   unsigned Opc = Op.getOpcode();
1296   if (TLI.isTypeDesirableForOp(Opc, VT))
1297     return false;
1298 
1299   EVT PVT = VT;
1300   // Consult target whether it is a good idea to promote this operation and
1301   // what's the right type to promote it to.
1302   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1303     assert(PVT != VT && "Don't know what type to promote to!");
1304 
1305     SDLoc DL(Op);
1306     SDNode *N = Op.getNode();
1307     LoadSDNode *LD = cast<LoadSDNode>(N);
1308     EVT MemVT = LD->getMemoryVT();
1309     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1310       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1311                                                        : ISD::EXTLOAD)
1312       : LD->getExtensionType();
1313     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1314                                    LD->getChain(), LD->getBasePtr(),
1315                                    MemVT, LD->getMemOperand());
1316     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1317 
1318     DEBUG(dbgs() << "\nPromoting ";
1319           N->dump(&DAG);
1320           dbgs() << "\nTo: ";
1321           Result.getNode()->dump(&DAG);
1322           dbgs() << '\n');
1323     WorklistRemover DeadNodes(*this);
1324     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1325     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1326     deleteAndRecombine(N);
1327     AddToWorklist(Result.getNode());
1328     return true;
1329   }
1330   return false;
1331 }
1332 
1333 /// \brief Recursively delete a node which has no uses and any operands for
1334 /// which it is the only use.
1335 ///
1336 /// Note that this both deletes the nodes and removes them from the worklist.
1337 /// It also adds any nodes who have had a user deleted to the worklist as they
1338 /// may now have only one use and subject to other combines.
1339 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1340   if (!N->use_empty())
1341     return false;
1342 
1343   SmallSetVector<SDNode *, 16> Nodes;
1344   Nodes.insert(N);
1345   do {
1346     N = Nodes.pop_back_val();
1347     if (!N)
1348       continue;
1349 
1350     if (N->use_empty()) {
1351       for (const SDValue &ChildN : N->op_values())
1352         Nodes.insert(ChildN.getNode());
1353 
1354       removeFromWorklist(N);
1355       DAG.DeleteNode(N);
1356     } else {
1357       AddToWorklist(N);
1358     }
1359   } while (!Nodes.empty());
1360   return true;
1361 }
1362 
1363 //===----------------------------------------------------------------------===//
1364 //  Main DAG Combiner implementation
1365 //===----------------------------------------------------------------------===//
1366 
1367 void DAGCombiner::Run(CombineLevel AtLevel) {
1368   // set the instance variables, so that the various visit routines may use it.
1369   Level = AtLevel;
1370   LegalOperations = Level >= AfterLegalizeVectorOps;
1371   LegalTypes = Level >= AfterLegalizeTypes;
1372 
1373   // Add all the dag nodes to the worklist.
1374   for (SDNode &Node : DAG.allnodes())
1375     AddToWorklist(&Node);
1376 
1377   // Create a dummy node (which is not added to allnodes), that adds a reference
1378   // to the root node, preventing it from being deleted, and tracking any
1379   // changes of the root.
1380   HandleSDNode Dummy(DAG.getRoot());
1381 
1382   // While the worklist isn't empty, find a node and try to combine it.
1383   while (!WorklistMap.empty()) {
1384     SDNode *N;
1385     // The Worklist holds the SDNodes in order, but it may contain null entries.
1386     do {
1387       N = Worklist.pop_back_val();
1388     } while (!N);
1389 
1390     bool GoodWorklistEntry = WorklistMap.erase(N);
1391     (void)GoodWorklistEntry;
1392     assert(GoodWorklistEntry &&
1393            "Found a worklist entry without a corresponding map entry!");
1394 
1395     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1396     // N is deleted from the DAG, since they too may now be dead or may have a
1397     // reduced number of uses, allowing other xforms.
1398     if (recursivelyDeleteUnusedNodes(N))
1399       continue;
1400 
1401     WorklistRemover DeadNodes(*this);
1402 
1403     // If this combine is running after legalizing the DAG, re-legalize any
1404     // nodes pulled off the worklist.
1405     if (Level == AfterLegalizeDAG) {
1406       SmallSetVector<SDNode *, 16> UpdatedNodes;
1407       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1408 
1409       for (SDNode *LN : UpdatedNodes) {
1410         AddToWorklist(LN);
1411         AddUsersToWorklist(LN);
1412       }
1413       if (!NIsValid)
1414         continue;
1415     }
1416 
1417     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1418 
1419     // Add any operands of the new node which have not yet been combined to the
1420     // worklist as well. Because the worklist uniques things already, this
1421     // won't repeatedly process the same operand.
1422     CombinedNodes.insert(N);
1423     for (const SDValue &ChildN : N->op_values())
1424       if (!CombinedNodes.count(ChildN.getNode()))
1425         AddToWorklist(ChildN.getNode());
1426 
1427     SDValue RV = combine(N);
1428 
1429     if (!RV.getNode())
1430       continue;
1431 
1432     ++NodesCombined;
1433 
1434     // If we get back the same node we passed in, rather than a new node or
1435     // zero, we know that the node must have defined multiple values and
1436     // CombineTo was used.  Since CombineTo takes care of the worklist
1437     // mechanics for us, we have no work to do in this case.
1438     if (RV.getNode() == N)
1439       continue;
1440 
1441     assert(N->getOpcode() != ISD::DELETED_NODE &&
1442            RV.getOpcode() != ISD::DELETED_NODE &&
1443            "Node was deleted but visit returned new node!");
1444 
1445     DEBUG(dbgs() << " ... into: ";
1446           RV.getNode()->dump(&DAG));
1447 
1448     if (N->getNumValues() == RV.getNode()->getNumValues())
1449       DAG.ReplaceAllUsesWith(N, RV.getNode());
1450     else {
1451       assert(N->getValueType(0) == RV.getValueType() &&
1452              N->getNumValues() == 1 && "Type mismatch");
1453       DAG.ReplaceAllUsesWith(N, &RV);
1454     }
1455 
1456     // Push the new node and any users onto the worklist
1457     AddToWorklist(RV.getNode());
1458     AddUsersToWorklist(RV.getNode());
1459 
1460     // Finally, if the node is now dead, remove it from the graph.  The node
1461     // may not be dead if the replacement process recursively simplified to
1462     // something else needing this node. This will also take care of adding any
1463     // operands which have lost a user to the worklist.
1464     recursivelyDeleteUnusedNodes(N);
1465   }
1466 
1467   // If the root changed (e.g. it was a dead load, update the root).
1468   DAG.setRoot(Dummy.getValue());
1469   DAG.RemoveDeadNodes();
1470 }
1471 
1472 SDValue DAGCombiner::visit(SDNode *N) {
1473   switch (N->getOpcode()) {
1474   default: break;
1475   case ISD::TokenFactor:        return visitTokenFactor(N);
1476   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1477   case ISD::ADD:                return visitADD(N);
1478   case ISD::SUB:                return visitSUB(N);
1479   case ISD::ADDC:               return visitADDC(N);
1480   case ISD::UADDO:              return visitUADDO(N);
1481   case ISD::SUBC:               return visitSUBC(N);
1482   case ISD::USUBO:              return visitUSUBO(N);
1483   case ISD::ADDE:               return visitADDE(N);
1484   case ISD::ADDCARRY:           return visitADDCARRY(N);
1485   case ISD::SUBE:               return visitSUBE(N);
1486   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1487   case ISD::MUL:                return visitMUL(N);
1488   case ISD::SDIV:               return visitSDIV(N);
1489   case ISD::UDIV:               return visitUDIV(N);
1490   case ISD::SREM:
1491   case ISD::UREM:               return visitREM(N);
1492   case ISD::MULHU:              return visitMULHU(N);
1493   case ISD::MULHS:              return visitMULHS(N);
1494   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1495   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1496   case ISD::SMULO:              return visitSMULO(N);
1497   case ISD::UMULO:              return visitUMULO(N);
1498   case ISD::SMIN:
1499   case ISD::SMAX:
1500   case ISD::UMIN:
1501   case ISD::UMAX:               return visitIMINMAX(N);
1502   case ISD::AND:                return visitAND(N);
1503   case ISD::OR:                 return visitOR(N);
1504   case ISD::XOR:                return visitXOR(N);
1505   case ISD::SHL:                return visitSHL(N);
1506   case ISD::SRA:                return visitSRA(N);
1507   case ISD::SRL:                return visitSRL(N);
1508   case ISD::ROTR:
1509   case ISD::ROTL:               return visitRotate(N);
1510   case ISD::ABS:                return visitABS(N);
1511   case ISD::BSWAP:              return visitBSWAP(N);
1512   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1513   case ISD::CTLZ:               return visitCTLZ(N);
1514   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1515   case ISD::CTTZ:               return visitCTTZ(N);
1516   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1517   case ISD::CTPOP:              return visitCTPOP(N);
1518   case ISD::SELECT:             return visitSELECT(N);
1519   case ISD::VSELECT:            return visitVSELECT(N);
1520   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1521   case ISD::SETCC:              return visitSETCC(N);
1522   case ISD::SETCCE:             return visitSETCCE(N);
1523   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1524   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1525   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1526   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1527   case ISD::AssertZext:         return visitAssertZext(N);
1528   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1529   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1530   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1531   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1532   case ISD::BITCAST:            return visitBITCAST(N);
1533   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1534   case ISD::FADD:               return visitFADD(N);
1535   case ISD::FSUB:               return visitFSUB(N);
1536   case ISD::FMUL:               return visitFMUL(N);
1537   case ISD::FMA:                return visitFMA(N);
1538   case ISD::FDIV:               return visitFDIV(N);
1539   case ISD::FREM:               return visitFREM(N);
1540   case ISD::FSQRT:              return visitFSQRT(N);
1541   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1542   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1543   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1544   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1545   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1546   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1547   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1548   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1549   case ISD::FNEG:               return visitFNEG(N);
1550   case ISD::FABS:               return visitFABS(N);
1551   case ISD::FFLOOR:             return visitFFLOOR(N);
1552   case ISD::FMINNUM:            return visitFMINNUM(N);
1553   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1554   case ISD::FCEIL:              return visitFCEIL(N);
1555   case ISD::FTRUNC:             return visitFTRUNC(N);
1556   case ISD::BRCOND:             return visitBRCOND(N);
1557   case ISD::BR_CC:              return visitBR_CC(N);
1558   case ISD::LOAD:               return visitLOAD(N);
1559   case ISD::STORE:              return visitSTORE(N);
1560   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1561   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1562   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1563   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1564   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1565   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1566   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1567   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1568   case ISD::MGATHER:            return visitMGATHER(N);
1569   case ISD::MLOAD:              return visitMLOAD(N);
1570   case ISD::MSCATTER:           return visitMSCATTER(N);
1571   case ISD::MSTORE:             return visitMSTORE(N);
1572   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1573   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1574   }
1575   return SDValue();
1576 }
1577 
1578 SDValue DAGCombiner::combine(SDNode *N) {
1579   SDValue RV = visit(N);
1580 
1581   // If nothing happened, try a target-specific DAG combine.
1582   if (!RV.getNode()) {
1583     assert(N->getOpcode() != ISD::DELETED_NODE &&
1584            "Node was deleted but visit returned NULL!");
1585 
1586     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1587         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1588 
1589       // Expose the DAG combiner to the target combiner impls.
1590       TargetLowering::DAGCombinerInfo
1591         DagCombineInfo(DAG, Level, false, this);
1592 
1593       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1594     }
1595   }
1596 
1597   // If nothing happened still, try promoting the operation.
1598   if (!RV.getNode()) {
1599     switch (N->getOpcode()) {
1600     default: break;
1601     case ISD::ADD:
1602     case ISD::SUB:
1603     case ISD::MUL:
1604     case ISD::AND:
1605     case ISD::OR:
1606     case ISD::XOR:
1607       RV = PromoteIntBinOp(SDValue(N, 0));
1608       break;
1609     case ISD::SHL:
1610     case ISD::SRA:
1611     case ISD::SRL:
1612       RV = PromoteIntShiftOp(SDValue(N, 0));
1613       break;
1614     case ISD::SIGN_EXTEND:
1615     case ISD::ZERO_EXTEND:
1616     case ISD::ANY_EXTEND:
1617       RV = PromoteExtend(SDValue(N, 0));
1618       break;
1619     case ISD::LOAD:
1620       if (PromoteLoad(SDValue(N, 0)))
1621         RV = SDValue(N, 0);
1622       break;
1623     }
1624   }
1625 
1626   // If N is a commutative binary node, try eliminate it if the commuted
1627   // version is already present in the DAG.
1628   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1629       N->getNumValues() == 1) {
1630     SDValue N0 = N->getOperand(0);
1631     SDValue N1 = N->getOperand(1);
1632 
1633     // Constant operands are canonicalized to RHS.
1634     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1635       SDValue Ops[] = {N1, N0};
1636       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1637                                             N->getFlags());
1638       if (CSENode)
1639         return SDValue(CSENode, 0);
1640     }
1641   }
1642 
1643   return RV;
1644 }
1645 
1646 /// Given a node, return its input chain if it has one, otherwise return a null
1647 /// sd operand.
1648 static SDValue getInputChainForNode(SDNode *N) {
1649   if (unsigned NumOps = N->getNumOperands()) {
1650     if (N->getOperand(0).getValueType() == MVT::Other)
1651       return N->getOperand(0);
1652     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1653       return N->getOperand(NumOps-1);
1654     for (unsigned i = 1; i < NumOps-1; ++i)
1655       if (N->getOperand(i).getValueType() == MVT::Other)
1656         return N->getOperand(i);
1657   }
1658   return SDValue();
1659 }
1660 
1661 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1662   // If N has two operands, where one has an input chain equal to the other,
1663   // the 'other' chain is redundant.
1664   if (N->getNumOperands() == 2) {
1665     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1666       return N->getOperand(0);
1667     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1668       return N->getOperand(1);
1669   }
1670 
1671   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1672   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1673   SmallPtrSet<SDNode*, 16> SeenOps;
1674   bool Changed = false;             // If we should replace this token factor.
1675 
1676   // Start out with this token factor.
1677   TFs.push_back(N);
1678 
1679   // Iterate through token factors.  The TFs grows when new token factors are
1680   // encountered.
1681   for (unsigned i = 0; i < TFs.size(); ++i) {
1682     SDNode *TF = TFs[i];
1683 
1684     // Check each of the operands.
1685     for (const SDValue &Op : TF->op_values()) {
1686 
1687       switch (Op.getOpcode()) {
1688       case ISD::EntryToken:
1689         // Entry tokens don't need to be added to the list. They are
1690         // redundant.
1691         Changed = true;
1692         break;
1693 
1694       case ISD::TokenFactor:
1695         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1696           // Queue up for processing.
1697           TFs.push_back(Op.getNode());
1698           // Clean up in case the token factor is removed.
1699           AddToWorklist(Op.getNode());
1700           Changed = true;
1701           break;
1702         }
1703         LLVM_FALLTHROUGH;
1704 
1705       default:
1706         // Only add if it isn't already in the list.
1707         if (SeenOps.insert(Op.getNode()).second)
1708           Ops.push_back(Op);
1709         else
1710           Changed = true;
1711         break;
1712       }
1713     }
1714   }
1715 
1716   // Remove Nodes that are chained to another node in the list. Do so
1717   // by walking up chains breath-first stopping when we've seen
1718   // another operand. In general we must climb to the EntryNode, but we can exit
1719   // early if we find all remaining work is associated with just one operand as
1720   // no further pruning is possible.
1721 
1722   // List of nodes to search through and original Ops from which they originate.
1723   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1724   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1725   SmallPtrSet<SDNode *, 16> SeenChains;
1726   bool DidPruneOps = false;
1727 
1728   unsigned NumLeftToConsider = 0;
1729   for (const SDValue &Op : Ops) {
1730     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1731     OpWorkCount.push_back(1);
1732   }
1733 
1734   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1735     // If this is an Op, we can remove the op from the list. Remark any
1736     // search associated with it as from the current OpNumber.
1737     if (SeenOps.count(Op) != 0) {
1738       Changed = true;
1739       DidPruneOps = true;
1740       unsigned OrigOpNumber = 0;
1741       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1742         OrigOpNumber++;
1743       assert((OrigOpNumber != Ops.size()) &&
1744              "expected to find TokenFactor Operand");
1745       // Re-mark worklist from OrigOpNumber to OpNumber
1746       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1747         if (Worklist[i].second == OrigOpNumber) {
1748           Worklist[i].second = OpNumber;
1749         }
1750       }
1751       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1752       OpWorkCount[OrigOpNumber] = 0;
1753       NumLeftToConsider--;
1754     }
1755     // Add if it's a new chain
1756     if (SeenChains.insert(Op).second) {
1757       OpWorkCount[OpNumber]++;
1758       Worklist.push_back(std::make_pair(Op, OpNumber));
1759     }
1760   };
1761 
1762   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1763     // We need at least be consider at least 2 Ops to prune.
1764     if (NumLeftToConsider <= 1)
1765       break;
1766     auto CurNode = Worklist[i].first;
1767     auto CurOpNumber = Worklist[i].second;
1768     assert((OpWorkCount[CurOpNumber] > 0) &&
1769            "Node should not appear in worklist");
1770     switch (CurNode->getOpcode()) {
1771     case ISD::EntryToken:
1772       // Hitting EntryToken is the only way for the search to terminate without
1773       // hitting
1774       // another operand's search. Prevent us from marking this operand
1775       // considered.
1776       NumLeftToConsider++;
1777       break;
1778     case ISD::TokenFactor:
1779       for (const SDValue &Op : CurNode->op_values())
1780         AddToWorklist(i, Op.getNode(), CurOpNumber);
1781       break;
1782     case ISD::CopyFromReg:
1783     case ISD::CopyToReg:
1784       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1785       break;
1786     default:
1787       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1788         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1789       break;
1790     }
1791     OpWorkCount[CurOpNumber]--;
1792     if (OpWorkCount[CurOpNumber] == 0)
1793       NumLeftToConsider--;
1794   }
1795 
1796   // If we've changed things around then replace token factor.
1797   if (Changed) {
1798     SDValue Result;
1799     if (Ops.empty()) {
1800       // The entry token is the only possible outcome.
1801       Result = DAG.getEntryNode();
1802     } else {
1803       if (DidPruneOps) {
1804         SmallVector<SDValue, 8> PrunedOps;
1805         //
1806         for (const SDValue &Op : Ops) {
1807           if (SeenChains.count(Op.getNode()) == 0)
1808             PrunedOps.push_back(Op);
1809         }
1810         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1811       } else {
1812         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1813       }
1814     }
1815     return Result;
1816   }
1817   return SDValue();
1818 }
1819 
1820 /// MERGE_VALUES can always be eliminated.
1821 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1822   WorklistRemover DeadNodes(*this);
1823   // Replacing results may cause a different MERGE_VALUES to suddenly
1824   // be CSE'd with N, and carry its uses with it. Iterate until no
1825   // uses remain, to ensure that the node can be safely deleted.
1826   // First add the users of this node to the work list so that they
1827   // can be tried again once they have new operands.
1828   AddUsersToWorklist(N);
1829   do {
1830     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1831       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1832   } while (!N->use_empty());
1833   deleteAndRecombine(N);
1834   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1835 }
1836 
1837 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1838 /// ConstantSDNode pointer else nullptr.
1839 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1840   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1841   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1842 }
1843 
1844 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1845   auto BinOpcode = BO->getOpcode();
1846   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1847           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1848           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1849           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1850           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1851           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1852           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1853           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1854           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1855          "Unexpected binary operator");
1856 
1857   // Bail out if any constants are opaque because we can't constant fold those.
1858   SDValue C1 = BO->getOperand(1);
1859   if (!isConstantOrConstantVector(C1, true) &&
1860       !isConstantFPBuildVectorOrConstantFP(C1))
1861     return SDValue();
1862 
1863   // Don't do this unless the old select is going away. We want to eliminate the
1864   // binary operator, not replace a binop with a select.
1865   // TODO: Handle ISD::SELECT_CC.
1866   SDValue Sel = BO->getOperand(0);
1867   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1868     return SDValue();
1869 
1870   SDValue CT = Sel.getOperand(1);
1871   if (!isConstantOrConstantVector(CT, true) &&
1872       !isConstantFPBuildVectorOrConstantFP(CT))
1873     return SDValue();
1874 
1875   SDValue CF = Sel.getOperand(2);
1876   if (!isConstantOrConstantVector(CF, true) &&
1877       !isConstantFPBuildVectorOrConstantFP(CF))
1878     return SDValue();
1879 
1880   // We have a select-of-constants followed by a binary operator with a
1881   // constant. Eliminate the binop by pulling the constant math into the select.
1882   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1883   EVT VT = Sel.getValueType();
1884   SDLoc DL(Sel);
1885   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1886   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1887           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1888          "Failed to constant fold a binop with constant operands");
1889 
1890   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1891   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1892           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1893          "Failed to constant fold a binop with constant operands");
1894 
1895   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1896 }
1897 
1898 SDValue DAGCombiner::visitADD(SDNode *N) {
1899   SDValue N0 = N->getOperand(0);
1900   SDValue N1 = N->getOperand(1);
1901   EVT VT = N0.getValueType();
1902   SDLoc DL(N);
1903 
1904   // fold vector ops
1905   if (VT.isVector()) {
1906     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1907       return FoldedVOp;
1908 
1909     // fold (add x, 0) -> x, vector edition
1910     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1911       return N0;
1912     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1913       return N1;
1914   }
1915 
1916   // fold (add x, undef) -> undef
1917   if (N0.isUndef())
1918     return N0;
1919 
1920   if (N1.isUndef())
1921     return N1;
1922 
1923   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1924     // canonicalize constant to RHS
1925     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1926       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1927     // fold (add c1, c2) -> c1+c2
1928     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1929                                       N1.getNode());
1930   }
1931 
1932   // fold (add x, 0) -> x
1933   if (isNullConstant(N1))
1934     return N0;
1935 
1936   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1937     // fold ((c1-A)+c2) -> (c1+c2)-A
1938     if (N0.getOpcode() == ISD::SUB &&
1939         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1940       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1941       return DAG.getNode(ISD::SUB, DL, VT,
1942                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1943                          N0.getOperand(1));
1944     }
1945 
1946     // add (sext i1 X), 1 -> zext (not i1 X)
1947     // We don't transform this pattern:
1948     //   add (zext i1 X), -1 -> sext (not i1 X)
1949     // because most (?) targets generate better code for the zext form.
1950     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1951         isOneConstantOrOneSplatConstant(N1)) {
1952       SDValue X = N0.getOperand(0);
1953       if ((!LegalOperations ||
1954            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1955             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1956           X.getScalarValueSizeInBits() == 1) {
1957         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1958         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1959       }
1960     }
1961 
1962     // Undo the add -> or combine to merge constant offsets from a frame index.
1963     if (N0.getOpcode() == ISD::OR &&
1964         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1965         isa<ConstantSDNode>(N0.getOperand(1)) &&
1966         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1967       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1968       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1969     }
1970   }
1971 
1972   if (SDValue NewSel = foldBinOpIntoSelect(N))
1973     return NewSel;
1974 
1975   // reassociate add
1976   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1977     return RADD;
1978 
1979   // fold ((0-A) + B) -> B-A
1980   if (N0.getOpcode() == ISD::SUB &&
1981       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1982     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1983 
1984   // fold (A + (0-B)) -> A-B
1985   if (N1.getOpcode() == ISD::SUB &&
1986       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1987     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1988 
1989   // fold (A+(B-A)) -> B
1990   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1991     return N1.getOperand(0);
1992 
1993   // fold ((B-A)+A) -> B
1994   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1995     return N0.getOperand(0);
1996 
1997   // fold (A+(B-(A+C))) to (B-C)
1998   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1999       N0 == N1.getOperand(1).getOperand(0))
2000     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2001                        N1.getOperand(1).getOperand(1));
2002 
2003   // fold (A+(B-(C+A))) to (B-C)
2004   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2005       N0 == N1.getOperand(1).getOperand(1))
2006     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2007                        N1.getOperand(1).getOperand(0));
2008 
2009   // fold (A+((B-A)+or-C)) to (B+or-C)
2010   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2011       N1.getOperand(0).getOpcode() == ISD::SUB &&
2012       N0 == N1.getOperand(0).getOperand(1))
2013     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2014                        N1.getOperand(1));
2015 
2016   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2017   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2018     SDValue N00 = N0.getOperand(0);
2019     SDValue N01 = N0.getOperand(1);
2020     SDValue N10 = N1.getOperand(0);
2021     SDValue N11 = N1.getOperand(1);
2022 
2023     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2024       return DAG.getNode(ISD::SUB, DL, VT,
2025                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2026                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2027   }
2028 
2029   if (SimplifyDemandedBits(SDValue(N, 0)))
2030     return SDValue(N, 0);
2031 
2032   // fold (a+b) -> (a|b) iff a and b share no bits.
2033   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2034       DAG.haveNoCommonBitsSet(N0, N1))
2035     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2036 
2037   if (SDValue Combined = visitADDLike(N0, N1, N))
2038     return Combined;
2039 
2040   if (SDValue Combined = visitADDLike(N1, N0, N))
2041     return Combined;
2042 
2043   return SDValue();
2044 }
2045 
2046 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2047   bool Masked = false;
2048 
2049   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2050   while (true) {
2051     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2052       V = V.getOperand(0);
2053       continue;
2054     }
2055 
2056     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2057       Masked = true;
2058       V = V.getOperand(0);
2059       continue;
2060     }
2061 
2062     break;
2063   }
2064 
2065   // If this is not a carry, return.
2066   if (V.getResNo() != 1)
2067     return SDValue();
2068 
2069   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2070       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2071     return SDValue();
2072 
2073   // If the result is masked, then no matter what kind of bool it is we can
2074   // return. If it isn't, then we need to make sure the bool type is either 0 or
2075   // 1 and not other values.
2076   if (Masked ||
2077       TLI.getBooleanContents(V.getValueType()) ==
2078           TargetLoweringBase::ZeroOrOneBooleanContent)
2079     return V;
2080 
2081   return SDValue();
2082 }
2083 
2084 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2085   EVT VT = N0.getValueType();
2086   SDLoc DL(LocReference);
2087 
2088   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2089   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2090       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2091     return DAG.getNode(ISD::SUB, DL, VT, N0,
2092                        DAG.getNode(ISD::SHL, DL, VT,
2093                                    N1.getOperand(0).getOperand(1),
2094                                    N1.getOperand(1)));
2095 
2096   if (N1.getOpcode() == ISD::AND) {
2097     SDValue AndOp0 = N1.getOperand(0);
2098     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2099     unsigned DestBits = VT.getScalarSizeInBits();
2100 
2101     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2102     // and similar xforms where the inner op is either ~0 or 0.
2103     if (NumSignBits == DestBits &&
2104         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2105       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2106   }
2107 
2108   // add (sext i1), X -> sub X, (zext i1)
2109   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2110       N0.getOperand(0).getValueType() == MVT::i1 &&
2111       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2112     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2113     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2114   }
2115 
2116   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2117   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2118     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2119     if (TN->getVT() == MVT::i1) {
2120       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2121                                  DAG.getConstant(1, DL, VT));
2122       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2123     }
2124   }
2125 
2126   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2127   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2128     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2129                        N0, N1.getOperand(0), N1.getOperand(2));
2130 
2131   // (add X, Carry) -> (addcarry X, 0, Carry)
2132   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2133     if (SDValue Carry = getAsCarry(TLI, N1))
2134       return DAG.getNode(ISD::ADDCARRY, DL,
2135                          DAG.getVTList(VT, Carry.getValueType()), N0,
2136                          DAG.getConstant(0, DL, VT), Carry);
2137 
2138   return SDValue();
2139 }
2140 
2141 SDValue DAGCombiner::visitADDC(SDNode *N) {
2142   SDValue N0 = N->getOperand(0);
2143   SDValue N1 = N->getOperand(1);
2144   EVT VT = N0.getValueType();
2145   SDLoc DL(N);
2146 
2147   // If the flag result is dead, turn this into an ADD.
2148   if (!N->hasAnyUseOfValue(1))
2149     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2150                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2151 
2152   // canonicalize constant to RHS.
2153   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2154   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2155   if (N0C && !N1C)
2156     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2157 
2158   // fold (addc x, 0) -> x + no carry out
2159   if (isNullConstant(N1))
2160     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2161                                         DL, MVT::Glue));
2162 
2163   // If it cannot overflow, transform into an add.
2164   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2165     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2166                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2167 
2168   return SDValue();
2169 }
2170 
2171 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2172   SDValue N0 = N->getOperand(0);
2173   SDValue N1 = N->getOperand(1);
2174   EVT VT = N0.getValueType();
2175   if (VT.isVector())
2176     return SDValue();
2177 
2178   EVT CarryVT = N->getValueType(1);
2179   SDLoc DL(N);
2180 
2181   // If the flag result is dead, turn this into an ADD.
2182   if (!N->hasAnyUseOfValue(1))
2183     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2184                      DAG.getUNDEF(CarryVT));
2185 
2186   // canonicalize constant to RHS.
2187   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2188   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2189   if (N0C && !N1C)
2190     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2191 
2192   // fold (uaddo x, 0) -> x + no carry out
2193   if (isNullConstant(N1))
2194     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2195 
2196   // If it cannot overflow, transform into an add.
2197   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2198     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2199                      DAG.getConstant(0, DL, CarryVT));
2200 
2201   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2202     return Combined;
2203 
2204   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2205     return Combined;
2206 
2207   return SDValue();
2208 }
2209 
2210 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2211   auto VT = N0.getValueType();
2212 
2213   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2214   // If Y + 1 cannot overflow.
2215   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2216     SDValue Y = N1.getOperand(0);
2217     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2218     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2219       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2220                          N1.getOperand(2));
2221   }
2222 
2223   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2224   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2225     if (SDValue Carry = getAsCarry(TLI, N1))
2226       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2227                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2228 
2229   return SDValue();
2230 }
2231 
2232 SDValue DAGCombiner::visitADDE(SDNode *N) {
2233   SDValue N0 = N->getOperand(0);
2234   SDValue N1 = N->getOperand(1);
2235   SDValue CarryIn = N->getOperand(2);
2236 
2237   // canonicalize constant to RHS
2238   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2239   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2240   if (N0C && !N1C)
2241     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2242                        N1, N0, CarryIn);
2243 
2244   // fold (adde x, y, false) -> (addc x, y)
2245   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2246     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2247 
2248   return SDValue();
2249 }
2250 
2251 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2252   SDValue N0 = N->getOperand(0);
2253   SDValue N1 = N->getOperand(1);
2254   SDValue CarryIn = N->getOperand(2);
2255   SDLoc DL(N);
2256 
2257   // canonicalize constant to RHS
2258   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2259   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2260   if (N0C && !N1C)
2261     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2262 
2263   // fold (addcarry x, y, false) -> (uaddo x, y)
2264   if (isNullConstant(CarryIn))
2265     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2266 
2267   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2268   if (isNullConstant(N0) && isNullConstant(N1)) {
2269     EVT VT = N0.getValueType();
2270     EVT CarryVT = CarryIn.getValueType();
2271     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2272     AddToWorklist(CarryExt.getNode());
2273     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2274                                     DAG.getConstant(1, DL, VT)),
2275                      DAG.getConstant(0, DL, CarryVT));
2276   }
2277 
2278   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2279     return Combined;
2280 
2281   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2282     return Combined;
2283 
2284   return SDValue();
2285 }
2286 
2287 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2288                                        SDNode *N) {
2289   // Iff the flag result is dead:
2290   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2291   if ((N0.getOpcode() == ISD::ADD ||
2292        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2293       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2294     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2295                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2296 
2297   /**
2298    * When one of the addcarry argument is itself a carry, we may be facing
2299    * a diamond carry propagation. In which case we try to transform the DAG
2300    * to ensure linear carry propagation if that is possible.
2301    *
2302    * We are trying to get:
2303    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2304    */
2305   if (auto Y = getAsCarry(TLI, N1)) {
2306     /**
2307      *            (uaddo A, B)
2308      *             /       \
2309      *          Carry      Sum
2310      *            |          \
2311      *            | (addcarry *, 0, Z)
2312      *            |       /
2313      *             \   Carry
2314      *              |   /
2315      * (addcarry X, *, *)
2316      */
2317     if (Y.getOpcode() == ISD::UADDO &&
2318         CarryIn.getResNo() == 1 &&
2319         CarryIn.getOpcode() == ISD::ADDCARRY &&
2320         isNullConstant(CarryIn.getOperand(1)) &&
2321         CarryIn.getOperand(0) == Y.getValue(0)) {
2322       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2323                               Y.getOperand(0), Y.getOperand(1),
2324                               CarryIn.getOperand(2));
2325       AddToWorklist(NewY.getNode());
2326       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2327                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2328                          NewY.getValue(1));
2329     }
2330   }
2331 
2332   return SDValue();
2333 }
2334 
2335 // Since it may not be valid to emit a fold to zero for vector initializers
2336 // check if we can before folding.
2337 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2338                              SelectionDAG &DAG, bool LegalOperations,
2339                              bool LegalTypes) {
2340   if (!VT.isVector())
2341     return DAG.getConstant(0, DL, VT);
2342   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2343     return DAG.getConstant(0, DL, VT);
2344   return SDValue();
2345 }
2346 
2347 SDValue DAGCombiner::visitSUB(SDNode *N) {
2348   SDValue N0 = N->getOperand(0);
2349   SDValue N1 = N->getOperand(1);
2350   EVT VT = N0.getValueType();
2351   SDLoc DL(N);
2352 
2353   // fold vector ops
2354   if (VT.isVector()) {
2355     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2356       return FoldedVOp;
2357 
2358     // fold (sub x, 0) -> x, vector edition
2359     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2360       return N0;
2361   }
2362 
2363   // fold (sub x, x) -> 0
2364   // FIXME: Refactor this and xor and other similar operations together.
2365   if (N0 == N1)
2366     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2367   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2368       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2369     // fold (sub c1, c2) -> c1-c2
2370     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2371                                       N1.getNode());
2372   }
2373 
2374   if (SDValue NewSel = foldBinOpIntoSelect(N))
2375     return NewSel;
2376 
2377   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2378 
2379   // fold (sub x, c) -> (add x, -c)
2380   if (N1C) {
2381     return DAG.getNode(ISD::ADD, DL, VT, N0,
2382                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2383   }
2384 
2385   if (isNullConstantOrNullSplatConstant(N0)) {
2386     unsigned BitWidth = VT.getScalarSizeInBits();
2387     // Right-shifting everything out but the sign bit followed by negation is
2388     // the same as flipping arithmetic/logical shift type without the negation:
2389     // -(X >>u 31) -> (X >>s 31)
2390     // -(X >>s 31) -> (X >>u 31)
2391     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2392       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2393       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2394         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2395         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2396           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2397       }
2398     }
2399 
2400     // 0 - X --> 0 if the sub is NUW.
2401     if (N->getFlags().hasNoUnsignedWrap())
2402       return N0;
2403 
2404     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2405       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2406       // N1 must be 0 because negating the minimum signed value is undefined.
2407       if (N->getFlags().hasNoSignedWrap())
2408         return N0;
2409 
2410       // 0 - X --> X if X is 0 or the minimum signed value.
2411       return N1;
2412     }
2413   }
2414 
2415   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2416   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2417     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2418 
2419   // fold A-(A-B) -> B
2420   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2421     return N1.getOperand(1);
2422 
2423   // fold (A+B)-A -> B
2424   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2425     return N0.getOperand(1);
2426 
2427   // fold (A+B)-B -> A
2428   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2429     return N0.getOperand(0);
2430 
2431   // fold C2-(A+C1) -> (C2-C1)-A
2432   if (N1.getOpcode() == ISD::ADD) {
2433     SDValue N11 = N1.getOperand(1);
2434     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2435         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2436       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2437       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2438     }
2439   }
2440 
2441   // fold ((A+(B+or-C))-B) -> A+or-C
2442   if (N0.getOpcode() == ISD::ADD &&
2443       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2444        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2445       N0.getOperand(1).getOperand(0) == N1)
2446     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2447                        N0.getOperand(1).getOperand(1));
2448 
2449   // fold ((A+(C+B))-B) -> A+C
2450   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2451       N0.getOperand(1).getOperand(1) == N1)
2452     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2453                        N0.getOperand(1).getOperand(0));
2454 
2455   // fold ((A-(B-C))-C) -> A-B
2456   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2457       N0.getOperand(1).getOperand(1) == N1)
2458     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2459                        N0.getOperand(1).getOperand(0));
2460 
2461   // If either operand of a sub is undef, the result is undef
2462   if (N0.isUndef())
2463     return N0;
2464   if (N1.isUndef())
2465     return N1;
2466 
2467   // If the relocation model supports it, consider symbol offsets.
2468   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2469     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2470       // fold (sub Sym, c) -> Sym-c
2471       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2472         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2473                                     GA->getOffset() -
2474                                         (uint64_t)N1C->getSExtValue());
2475       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2476       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2477         if (GA->getGlobal() == GB->getGlobal())
2478           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2479                                  DL, VT);
2480     }
2481 
2482   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2483   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2484     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2485     if (TN->getVT() == MVT::i1) {
2486       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2487                                  DAG.getConstant(1, DL, VT));
2488       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2489     }
2490   }
2491 
2492   return SDValue();
2493 }
2494 
2495 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2496   SDValue N0 = N->getOperand(0);
2497   SDValue N1 = N->getOperand(1);
2498   EVT VT = N0.getValueType();
2499   SDLoc DL(N);
2500 
2501   // If the flag result is dead, turn this into an SUB.
2502   if (!N->hasAnyUseOfValue(1))
2503     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2504                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2505 
2506   // fold (subc x, x) -> 0 + no borrow
2507   if (N0 == N1)
2508     return CombineTo(N, DAG.getConstant(0, DL, VT),
2509                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2510 
2511   // fold (subc x, 0) -> x + no borrow
2512   if (isNullConstant(N1))
2513     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2514 
2515   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2516   if (isAllOnesConstant(N0))
2517     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2518                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2519 
2520   return SDValue();
2521 }
2522 
2523 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2524   SDValue N0 = N->getOperand(0);
2525   SDValue N1 = N->getOperand(1);
2526   EVT VT = N0.getValueType();
2527   if (VT.isVector())
2528     return SDValue();
2529 
2530   EVT CarryVT = N->getValueType(1);
2531   SDLoc DL(N);
2532 
2533   // If the flag result is dead, turn this into an SUB.
2534   if (!N->hasAnyUseOfValue(1))
2535     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2536                      DAG.getUNDEF(CarryVT));
2537 
2538   // fold (usubo x, x) -> 0 + no borrow
2539   if (N0 == N1)
2540     return CombineTo(N, DAG.getConstant(0, DL, VT),
2541                      DAG.getConstant(0, DL, CarryVT));
2542 
2543   // fold (usubo x, 0) -> x + no borrow
2544   if (isNullConstant(N1))
2545     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2546 
2547   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2548   if (isAllOnesConstant(N0))
2549     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2550                      DAG.getConstant(0, DL, CarryVT));
2551 
2552   return SDValue();
2553 }
2554 
2555 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2556   SDValue N0 = N->getOperand(0);
2557   SDValue N1 = N->getOperand(1);
2558   SDValue CarryIn = N->getOperand(2);
2559 
2560   // fold (sube x, y, false) -> (subc x, y)
2561   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2562     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2563 
2564   return SDValue();
2565 }
2566 
2567 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2568   SDValue N0 = N->getOperand(0);
2569   SDValue N1 = N->getOperand(1);
2570   SDValue CarryIn = N->getOperand(2);
2571 
2572   // fold (subcarry x, y, false) -> (usubo x, y)
2573   if (isNullConstant(CarryIn))
2574     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2575 
2576   return SDValue();
2577 }
2578 
2579 SDValue DAGCombiner::visitMUL(SDNode *N) {
2580   SDValue N0 = N->getOperand(0);
2581   SDValue N1 = N->getOperand(1);
2582   EVT VT = N0.getValueType();
2583 
2584   // fold (mul x, undef) -> 0
2585   if (N0.isUndef() || N1.isUndef())
2586     return DAG.getConstant(0, SDLoc(N), VT);
2587 
2588   bool N0IsConst = false;
2589   bool N1IsConst = false;
2590   bool N1IsOpaqueConst = false;
2591   bool N0IsOpaqueConst = false;
2592   APInt ConstValue0, ConstValue1;
2593   // fold vector ops
2594   if (VT.isVector()) {
2595     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2596       return FoldedVOp;
2597 
2598     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2599     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2600     assert((!N0IsConst ||
2601             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2602            "Splat APInt should be element width");
2603     assert((!N1IsConst ||
2604             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2605            "Splat APInt should be element width");
2606   } else {
2607     N0IsConst = isa<ConstantSDNode>(N0);
2608     if (N0IsConst) {
2609       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2610       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2611     }
2612     N1IsConst = isa<ConstantSDNode>(N1);
2613     if (N1IsConst) {
2614       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2615       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2616     }
2617   }
2618 
2619   // fold (mul c1, c2) -> c1*c2
2620   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2621     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2622                                       N0.getNode(), N1.getNode());
2623 
2624   // canonicalize constant to RHS (vector doesn't have to splat)
2625   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2626      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2627     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2628   // fold (mul x, 0) -> 0
2629   if (N1IsConst && ConstValue1.isNullValue())
2630     return N1;
2631   // fold (mul x, 1) -> x
2632   if (N1IsConst && ConstValue1.isOneValue())
2633     return N0;
2634 
2635   if (SDValue NewSel = foldBinOpIntoSelect(N))
2636     return NewSel;
2637 
2638   // fold (mul x, -1) -> 0-x
2639   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2640     SDLoc DL(N);
2641     return DAG.getNode(ISD::SUB, DL, VT,
2642                        DAG.getConstant(0, DL, VT), N0);
2643   }
2644   // fold (mul x, (1 << c)) -> x << c
2645   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2646       DAG.isKnownToBeAPowerOfTwo(N1)) {
2647     SDLoc DL(N);
2648     SDValue LogBase2 = BuildLogBase2(N1, DL);
2649     AddToWorklist(LogBase2.getNode());
2650 
2651     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2652     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2653     AddToWorklist(Trunc.getNode());
2654     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2655   }
2656   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2657   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2658     unsigned Log2Val = (-ConstValue1).logBase2();
2659     SDLoc DL(N);
2660     // FIXME: If the input is something that is easily negated (e.g. a
2661     // single-use add), we should put the negate there.
2662     return DAG.getNode(ISD::SUB, DL, VT,
2663                        DAG.getConstant(0, DL, VT),
2664                        DAG.getNode(ISD::SHL, DL, VT, N0,
2665                             DAG.getConstant(Log2Val, DL,
2666                                       getShiftAmountTy(N0.getValueType()))));
2667   }
2668 
2669   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2670   if (N0.getOpcode() == ISD::SHL &&
2671       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2672       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2673     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2674     if (isConstantOrConstantVector(C3))
2675       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2676   }
2677 
2678   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2679   // use.
2680   {
2681     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2682 
2683     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2684     if (N0.getOpcode() == ISD::SHL &&
2685         isConstantOrConstantVector(N0.getOperand(1)) &&
2686         N0.getNode()->hasOneUse()) {
2687       Sh = N0; Y = N1;
2688     } else if (N1.getOpcode() == ISD::SHL &&
2689                isConstantOrConstantVector(N1.getOperand(1)) &&
2690                N1.getNode()->hasOneUse()) {
2691       Sh = N1; Y = N0;
2692     }
2693 
2694     if (Sh.getNode()) {
2695       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2696       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2697     }
2698   }
2699 
2700   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2701   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2702       N0.getOpcode() == ISD::ADD &&
2703       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2704       isMulAddWithConstProfitable(N, N0, N1))
2705       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2706                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2707                                      N0.getOperand(0), N1),
2708                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2709                                      N0.getOperand(1), N1));
2710 
2711   // reassociate mul
2712   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2713     return RMUL;
2714 
2715   return SDValue();
2716 }
2717 
2718 /// Return true if divmod libcall is available.
2719 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2720                                      const TargetLowering &TLI) {
2721   RTLIB::Libcall LC;
2722   EVT NodeType = Node->getValueType(0);
2723   if (!NodeType.isSimple())
2724     return false;
2725   switch (NodeType.getSimpleVT().SimpleTy) {
2726   default: return false; // No libcall for vector types.
2727   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2728   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2729   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2730   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2731   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2732   }
2733 
2734   return TLI.getLibcallName(LC) != nullptr;
2735 }
2736 
2737 /// Issue divrem if both quotient and remainder are needed.
2738 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2739   if (Node->use_empty())
2740     return SDValue(); // This is a dead node, leave it alone.
2741 
2742   unsigned Opcode = Node->getOpcode();
2743   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2744   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2745 
2746   // DivMod lib calls can still work on non-legal types if using lib-calls.
2747   EVT VT = Node->getValueType(0);
2748   if (VT.isVector() || !VT.isInteger())
2749     return SDValue();
2750 
2751   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2752     return SDValue();
2753 
2754   // If DIVREM is going to get expanded into a libcall,
2755   // but there is no libcall available, then don't combine.
2756   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2757       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2758     return SDValue();
2759 
2760   // If div is legal, it's better to do the normal expansion
2761   unsigned OtherOpcode = 0;
2762   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2763     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2764     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2765       return SDValue();
2766   } else {
2767     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2768     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2769       return SDValue();
2770   }
2771 
2772   SDValue Op0 = Node->getOperand(0);
2773   SDValue Op1 = Node->getOperand(1);
2774   SDValue combined;
2775   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2776          UE = Op0.getNode()->use_end(); UI != UE;) {
2777     SDNode *User = *UI++;
2778     if (User == Node || User->use_empty())
2779       continue;
2780     // Convert the other matching node(s), too;
2781     // otherwise, the DIVREM may get target-legalized into something
2782     // target-specific that we won't be able to recognize.
2783     unsigned UserOpc = User->getOpcode();
2784     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2785         User->getOperand(0) == Op0 &&
2786         User->getOperand(1) == Op1) {
2787       if (!combined) {
2788         if (UserOpc == OtherOpcode) {
2789           SDVTList VTs = DAG.getVTList(VT, VT);
2790           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2791         } else if (UserOpc == DivRemOpc) {
2792           combined = SDValue(User, 0);
2793         } else {
2794           assert(UserOpc == Opcode);
2795           continue;
2796         }
2797       }
2798       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2799         CombineTo(User, combined);
2800       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2801         CombineTo(User, combined.getValue(1));
2802     }
2803   }
2804   return combined;
2805 }
2806 
2807 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2808   SDValue N0 = N->getOperand(0);
2809   SDValue N1 = N->getOperand(1);
2810   EVT VT = N->getValueType(0);
2811   SDLoc DL(N);
2812 
2813   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2814     return DAG.getUNDEF(VT);
2815 
2816   // undef / X -> 0
2817   // undef % X -> 0
2818   if (N0.isUndef())
2819     return DAG.getConstant(0, DL, VT);
2820 
2821   return SDValue();
2822 }
2823 
2824 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2825   SDValue N0 = N->getOperand(0);
2826   SDValue N1 = N->getOperand(1);
2827   EVT VT = N->getValueType(0);
2828 
2829   // fold vector ops
2830   if (VT.isVector())
2831     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2832       return FoldedVOp;
2833 
2834   SDLoc DL(N);
2835 
2836   // fold (sdiv c1, c2) -> c1/c2
2837   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2838   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2839   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2840     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2841   // fold (sdiv X, 1) -> X
2842   if (N1C && N1C->isOne())
2843     return N0;
2844   // fold (sdiv X, -1) -> 0-X
2845   if (N1C && N1C->isAllOnesValue())
2846     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2847 
2848   if (SDValue V = simplifyDivRem(N, DAG))
2849     return V;
2850 
2851   if (SDValue NewSel = foldBinOpIntoSelect(N))
2852     return NewSel;
2853 
2854   // If we know the sign bits of both operands are zero, strength reduce to a
2855   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2856   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2857     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2858 
2859   // fold (sdiv X, pow2) -> simple ops after legalize
2860   // FIXME: We check for the exact bit here because the generic lowering gives
2861   // better results in that case. The target-specific lowering should learn how
2862   // to handle exact sdivs efficiently.
2863   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2864       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2865                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2866     // Target-specific implementation of sdiv x, pow2.
2867     if (SDValue Res = BuildSDIVPow2(N))
2868       return Res;
2869 
2870     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2871 
2872     // Splat the sign bit into the register
2873     SDValue SGN =
2874         DAG.getNode(ISD::SRA, DL, VT, N0,
2875                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2876                                     getShiftAmountTy(N0.getValueType())));
2877     AddToWorklist(SGN.getNode());
2878 
2879     // Add (N0 < 0) ? abs2 - 1 : 0;
2880     SDValue SRL =
2881         DAG.getNode(ISD::SRL, DL, VT, SGN,
2882                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2883                                     getShiftAmountTy(SGN.getValueType())));
2884     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2885     AddToWorklist(SRL.getNode());
2886     AddToWorklist(ADD.getNode());    // Divide by pow2
2887     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2888                   DAG.getConstant(lg2, DL,
2889                                   getShiftAmountTy(ADD.getValueType())));
2890 
2891     // If we're dividing by a positive value, we're done.  Otherwise, we must
2892     // negate the result.
2893     if (N1C->getAPIntValue().isNonNegative())
2894       return SRA;
2895 
2896     AddToWorklist(SRA.getNode());
2897     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2898   }
2899 
2900   // If integer divide is expensive and we satisfy the requirements, emit an
2901   // alternate sequence.  Targets may check function attributes for size/speed
2902   // trade-offs.
2903   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2904   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2905     if (SDValue Op = BuildSDIV(N))
2906       return Op;
2907 
2908   // sdiv, srem -> sdivrem
2909   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2910   // true.  Otherwise, we break the simplification logic in visitREM().
2911   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2912     if (SDValue DivRem = useDivRem(N))
2913         return DivRem;
2914 
2915   return SDValue();
2916 }
2917 
2918 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2919   SDValue N0 = N->getOperand(0);
2920   SDValue N1 = N->getOperand(1);
2921   EVT VT = N->getValueType(0);
2922 
2923   // fold vector ops
2924   if (VT.isVector())
2925     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2926       return FoldedVOp;
2927 
2928   SDLoc DL(N);
2929 
2930   // fold (udiv c1, c2) -> c1/c2
2931   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2932   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2933   if (N0C && N1C)
2934     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2935                                                     N0C, N1C))
2936       return Folded;
2937 
2938   if (SDValue V = simplifyDivRem(N, DAG))
2939     return V;
2940 
2941   if (SDValue NewSel = foldBinOpIntoSelect(N))
2942     return NewSel;
2943 
2944   // fold (udiv x, (1 << c)) -> x >>u c
2945   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2946       DAG.isKnownToBeAPowerOfTwo(N1)) {
2947     SDValue LogBase2 = BuildLogBase2(N1, DL);
2948     AddToWorklist(LogBase2.getNode());
2949 
2950     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2951     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2952     AddToWorklist(Trunc.getNode());
2953     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2954   }
2955 
2956   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2957   if (N1.getOpcode() == ISD::SHL) {
2958     SDValue N10 = N1.getOperand(0);
2959     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2960         DAG.isKnownToBeAPowerOfTwo(N10)) {
2961       SDValue LogBase2 = BuildLogBase2(N10, DL);
2962       AddToWorklist(LogBase2.getNode());
2963 
2964       EVT ADDVT = N1.getOperand(1).getValueType();
2965       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2966       AddToWorklist(Trunc.getNode());
2967       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2968       AddToWorklist(Add.getNode());
2969       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2970     }
2971   }
2972 
2973   // fold (udiv x, c) -> alternate
2974   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2975   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2976     if (SDValue Op = BuildUDIV(N))
2977       return Op;
2978 
2979   // sdiv, srem -> sdivrem
2980   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2981   // true.  Otherwise, we break the simplification logic in visitREM().
2982   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2983     if (SDValue DivRem = useDivRem(N))
2984         return DivRem;
2985 
2986   return SDValue();
2987 }
2988 
2989 // handles ISD::SREM and ISD::UREM
2990 SDValue DAGCombiner::visitREM(SDNode *N) {
2991   unsigned Opcode = N->getOpcode();
2992   SDValue N0 = N->getOperand(0);
2993   SDValue N1 = N->getOperand(1);
2994   EVT VT = N->getValueType(0);
2995   bool isSigned = (Opcode == ISD::SREM);
2996   SDLoc DL(N);
2997 
2998   // fold (rem c1, c2) -> c1%c2
2999   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3000   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3001   if (N0C && N1C)
3002     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3003       return Folded;
3004 
3005   if (SDValue V = simplifyDivRem(N, DAG))
3006     return V;
3007 
3008   if (SDValue NewSel = foldBinOpIntoSelect(N))
3009     return NewSel;
3010 
3011   if (isSigned) {
3012     // If we know the sign bits of both operands are zero, strength reduce to a
3013     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3014     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3015       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3016   } else {
3017     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3018     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3019       // fold (urem x, pow2) -> (and x, pow2-1)
3020       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3021       AddToWorklist(Add.getNode());
3022       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3023     }
3024     if (N1.getOpcode() == ISD::SHL &&
3025         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3026       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3027       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3028       AddToWorklist(Add.getNode());
3029       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3030     }
3031   }
3032 
3033   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
3034 
3035   // If X/C can be simplified by the division-by-constant logic, lower
3036   // X%C to the equivalent of X-X/C*C.
3037   // To avoid mangling nodes, this simplification requires that the combine()
3038   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3039   // against this by skipping the simplification if isIntDivCheap().  When
3040   // div is not cheap, combine will not return a DIVREM.  Regardless,
3041   // checking cheapness here makes sense since the simplification results in
3042   // fatter code.
3043   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3044     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3045     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3046     AddToWorklist(Div.getNode());
3047     SDValue OptimizedDiv = combine(Div.getNode());
3048     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3049       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3050              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3051       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3052       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3053       AddToWorklist(Mul.getNode());
3054       return Sub;
3055     }
3056   }
3057 
3058   // sdiv, srem -> sdivrem
3059   if (SDValue DivRem = useDivRem(N))
3060     return DivRem.getValue(1);
3061 
3062   return SDValue();
3063 }
3064 
3065 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3066   SDValue N0 = N->getOperand(0);
3067   SDValue N1 = N->getOperand(1);
3068   EVT VT = N->getValueType(0);
3069   SDLoc DL(N);
3070 
3071   // fold (mulhs x, 0) -> 0
3072   if (isNullConstant(N1))
3073     return N1;
3074   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3075   if (isOneConstant(N1)) {
3076     SDLoc DL(N);
3077     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3078                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3079                                        getShiftAmountTy(N0.getValueType())));
3080   }
3081   // fold (mulhs x, undef) -> 0
3082   if (N0.isUndef() || N1.isUndef())
3083     return DAG.getConstant(0, SDLoc(N), VT);
3084 
3085   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3086   // plus a shift.
3087   if (VT.isSimple() && !VT.isVector()) {
3088     MVT Simple = VT.getSimpleVT();
3089     unsigned SimpleSize = Simple.getSizeInBits();
3090     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3091     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3092       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3093       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3094       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3095       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3096             DAG.getConstant(SimpleSize, DL,
3097                             getShiftAmountTy(N1.getValueType())));
3098       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3099     }
3100   }
3101 
3102   return SDValue();
3103 }
3104 
3105 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3106   SDValue N0 = N->getOperand(0);
3107   SDValue N1 = N->getOperand(1);
3108   EVT VT = N->getValueType(0);
3109   SDLoc DL(N);
3110 
3111   // fold (mulhu x, 0) -> 0
3112   if (isNullConstant(N1))
3113     return N1;
3114   // fold (mulhu x, 1) -> 0
3115   if (isOneConstant(N1))
3116     return DAG.getConstant(0, DL, N0.getValueType());
3117   // fold (mulhu x, undef) -> 0
3118   if (N0.isUndef() || N1.isUndef())
3119     return DAG.getConstant(0, DL, VT);
3120 
3121   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3122   // plus a shift.
3123   if (VT.isSimple() && !VT.isVector()) {
3124     MVT Simple = VT.getSimpleVT();
3125     unsigned SimpleSize = Simple.getSizeInBits();
3126     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3127     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3128       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3129       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3130       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3131       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3132             DAG.getConstant(SimpleSize, DL,
3133                             getShiftAmountTy(N1.getValueType())));
3134       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3135     }
3136   }
3137 
3138   return SDValue();
3139 }
3140 
3141 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3142 /// give the opcodes for the two computations that are being performed. Return
3143 /// true if a simplification was made.
3144 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3145                                                 unsigned HiOp) {
3146   // If the high half is not needed, just compute the low half.
3147   bool HiExists = N->hasAnyUseOfValue(1);
3148   if (!HiExists &&
3149       (!LegalOperations ||
3150        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3151     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3152     return CombineTo(N, Res, Res);
3153   }
3154 
3155   // If the low half is not needed, just compute the high half.
3156   bool LoExists = N->hasAnyUseOfValue(0);
3157   if (!LoExists &&
3158       (!LegalOperations ||
3159        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3160     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3161     return CombineTo(N, Res, Res);
3162   }
3163 
3164   // If both halves are used, return as it is.
3165   if (LoExists && HiExists)
3166     return SDValue();
3167 
3168   // If the two computed results can be simplified separately, separate them.
3169   if (LoExists) {
3170     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3171     AddToWorklist(Lo.getNode());
3172     SDValue LoOpt = combine(Lo.getNode());
3173     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3174         (!LegalOperations ||
3175          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3176       return CombineTo(N, LoOpt, LoOpt);
3177   }
3178 
3179   if (HiExists) {
3180     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3181     AddToWorklist(Hi.getNode());
3182     SDValue HiOpt = combine(Hi.getNode());
3183     if (HiOpt.getNode() && HiOpt != Hi &&
3184         (!LegalOperations ||
3185          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3186       return CombineTo(N, HiOpt, HiOpt);
3187   }
3188 
3189   return SDValue();
3190 }
3191 
3192 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3193   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3194     return Res;
3195 
3196   EVT VT = N->getValueType(0);
3197   SDLoc DL(N);
3198 
3199   // If the type is twice as wide is legal, transform the mulhu to a wider
3200   // multiply plus a shift.
3201   if (VT.isSimple() && !VT.isVector()) {
3202     MVT Simple = VT.getSimpleVT();
3203     unsigned SimpleSize = Simple.getSizeInBits();
3204     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3205     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3206       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3207       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3208       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3209       // Compute the high part as N1.
3210       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3211             DAG.getConstant(SimpleSize, DL,
3212                             getShiftAmountTy(Lo.getValueType())));
3213       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3214       // Compute the low part as N0.
3215       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3216       return CombineTo(N, Lo, Hi);
3217     }
3218   }
3219 
3220   return SDValue();
3221 }
3222 
3223 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3224   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3225     return Res;
3226 
3227   EVT VT = N->getValueType(0);
3228   SDLoc DL(N);
3229 
3230   // If the type is twice as wide is legal, transform the mulhu to a wider
3231   // multiply plus a shift.
3232   if (VT.isSimple() && !VT.isVector()) {
3233     MVT Simple = VT.getSimpleVT();
3234     unsigned SimpleSize = Simple.getSizeInBits();
3235     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3236     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3237       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3238       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3239       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3240       // Compute the high part as N1.
3241       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3242             DAG.getConstant(SimpleSize, DL,
3243                             getShiftAmountTy(Lo.getValueType())));
3244       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3245       // Compute the low part as N0.
3246       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3247       return CombineTo(N, Lo, Hi);
3248     }
3249   }
3250 
3251   return SDValue();
3252 }
3253 
3254 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3255   // (smulo x, 2) -> (saddo x, x)
3256   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3257     if (C2->getAPIntValue() == 2)
3258       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3259                          N->getOperand(0), N->getOperand(0));
3260 
3261   return SDValue();
3262 }
3263 
3264 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3265   // (umulo x, 2) -> (uaddo x, x)
3266   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3267     if (C2->getAPIntValue() == 2)
3268       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3269                          N->getOperand(0), N->getOperand(0));
3270 
3271   return SDValue();
3272 }
3273 
3274 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3275   SDValue N0 = N->getOperand(0);
3276   SDValue N1 = N->getOperand(1);
3277   EVT VT = N0.getValueType();
3278 
3279   // fold vector ops
3280   if (VT.isVector())
3281     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3282       return FoldedVOp;
3283 
3284   // fold operation with constant operands.
3285   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3286   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3287   if (N0C && N1C)
3288     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3289 
3290   // canonicalize constant to RHS
3291   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3292      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3293     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3294 
3295   return SDValue();
3296 }
3297 
3298 /// If this is a binary operator with two operands of the same opcode, try to
3299 /// simplify it.
3300 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3301   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3302   EVT VT = N0.getValueType();
3303   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3304 
3305   // Bail early if none of these transforms apply.
3306   if (N0.getNumOperands() == 0) return SDValue();
3307 
3308   // For each of OP in AND/OR/XOR:
3309   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3310   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3311   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3312   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3313   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3314   //
3315   // do not sink logical op inside of a vector extend, since it may combine
3316   // into a vsetcc.
3317   EVT Op0VT = N0.getOperand(0).getValueType();
3318   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3319        N0.getOpcode() == ISD::SIGN_EXTEND ||
3320        N0.getOpcode() == ISD::BSWAP ||
3321        // Avoid infinite looping with PromoteIntBinOp.
3322        (N0.getOpcode() == ISD::ANY_EXTEND &&
3323         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3324        (N0.getOpcode() == ISD::TRUNCATE &&
3325         (!TLI.isZExtFree(VT, Op0VT) ||
3326          !TLI.isTruncateFree(Op0VT, VT)) &&
3327         TLI.isTypeLegal(Op0VT))) &&
3328       !VT.isVector() &&
3329       Op0VT == N1.getOperand(0).getValueType() &&
3330       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3331     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3332                                  N0.getOperand(0).getValueType(),
3333                                  N0.getOperand(0), N1.getOperand(0));
3334     AddToWorklist(ORNode.getNode());
3335     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3336   }
3337 
3338   // For each of OP in SHL/SRL/SRA/AND...
3339   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3340   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3341   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3342   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3343        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3344       N0.getOperand(1) == N1.getOperand(1)) {
3345     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3346                                  N0.getOperand(0).getValueType(),
3347                                  N0.getOperand(0), N1.getOperand(0));
3348     AddToWorklist(ORNode.getNode());
3349     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3350                        ORNode, N0.getOperand(1));
3351   }
3352 
3353   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3354   // Only perform this optimization up until type legalization, before
3355   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3356   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3357   // we don't want to undo this promotion.
3358   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3359   // on scalars.
3360   if ((N0.getOpcode() == ISD::BITCAST ||
3361        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3362        Level <= AfterLegalizeTypes) {
3363     SDValue In0 = N0.getOperand(0);
3364     SDValue In1 = N1.getOperand(0);
3365     EVT In0Ty = In0.getValueType();
3366     EVT In1Ty = In1.getValueType();
3367     SDLoc DL(N);
3368     // If both incoming values are integers, and the original types are the
3369     // same.
3370     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3371       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3372       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3373       AddToWorklist(Op.getNode());
3374       return BC;
3375     }
3376   }
3377 
3378   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3379   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3380   // If both shuffles use the same mask, and both shuffle within a single
3381   // vector, then it is worthwhile to move the swizzle after the operation.
3382   // The type-legalizer generates this pattern when loading illegal
3383   // vector types from memory. In many cases this allows additional shuffle
3384   // optimizations.
3385   // There are other cases where moving the shuffle after the xor/and/or
3386   // is profitable even if shuffles don't perform a swizzle.
3387   // If both shuffles use the same mask, and both shuffles have the same first
3388   // or second operand, then it might still be profitable to move the shuffle
3389   // after the xor/and/or operation.
3390   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3391     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3392     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3393 
3394     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3395            "Inputs to shuffles are not the same type");
3396 
3397     // Check that both shuffles use the same mask. The masks are known to be of
3398     // the same length because the result vector type is the same.
3399     // Check also that shuffles have only one use to avoid introducing extra
3400     // instructions.
3401     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3402         SVN0->getMask().equals(SVN1->getMask())) {
3403       SDValue ShOp = N0->getOperand(1);
3404 
3405       // Don't try to fold this node if it requires introducing a
3406       // build vector of all zeros that might be illegal at this stage.
3407       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3408         if (!LegalTypes)
3409           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3410         else
3411           ShOp = SDValue();
3412       }
3413 
3414       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3415       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3416       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3417       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3418         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3419                                       N0->getOperand(0), N1->getOperand(0));
3420         AddToWorklist(NewNode.getNode());
3421         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3422                                     SVN0->getMask());
3423       }
3424 
3425       // Don't try to fold this node if it requires introducing a
3426       // build vector of all zeros that might be illegal at this stage.
3427       ShOp = N0->getOperand(0);
3428       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3429         if (!LegalTypes)
3430           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3431         else
3432           ShOp = SDValue();
3433       }
3434 
3435       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3436       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3437       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3438       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3439         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3440                                       N0->getOperand(1), N1->getOperand(1));
3441         AddToWorklist(NewNode.getNode());
3442         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3443                                     SVN0->getMask());
3444       }
3445     }
3446   }
3447 
3448   return SDValue();
3449 }
3450 
3451 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3452 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3453                                        const SDLoc &DL) {
3454   SDValue LL, LR, RL, RR, N0CC, N1CC;
3455   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3456       !isSetCCEquivalent(N1, RL, RR, N1CC))
3457     return SDValue();
3458 
3459   assert(N0.getValueType() == N1.getValueType() &&
3460          "Unexpected operand types for bitwise logic op");
3461   assert(LL.getValueType() == LR.getValueType() &&
3462          RL.getValueType() == RR.getValueType() &&
3463          "Unexpected operand types for setcc");
3464 
3465   // If we're here post-legalization or the logic op type is not i1, the logic
3466   // op type must match a setcc result type. Also, all folds require new
3467   // operations on the left and right operands, so those types must match.
3468   EVT VT = N0.getValueType();
3469   EVT OpVT = LL.getValueType();
3470   if (LegalOperations || VT != MVT::i1)
3471     if (VT != getSetCCResultType(OpVT))
3472       return SDValue();
3473   if (OpVT != RL.getValueType())
3474     return SDValue();
3475 
3476   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3477   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3478   bool IsInteger = OpVT.isInteger();
3479   if (LR == RR && CC0 == CC1 && IsInteger) {
3480     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3481     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3482 
3483     // All bits clear?
3484     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3485     // All sign bits clear?
3486     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3487     // Any bits set?
3488     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3489     // Any sign bits set?
3490     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3491 
3492     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3493     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3494     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3495     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3496     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3497       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3498       AddToWorklist(Or.getNode());
3499       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3500     }
3501 
3502     // All bits set?
3503     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3504     // All sign bits set?
3505     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3506     // Any bits clear?
3507     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3508     // Any sign bits clear?
3509     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3510 
3511     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3512     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3513     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3514     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3515     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3516       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3517       AddToWorklist(And.getNode());
3518       return DAG.getSetCC(DL, VT, And, LR, CC1);
3519     }
3520   }
3521 
3522   // TODO: What is the 'or' equivalent of this fold?
3523   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3524   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3525       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3526        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3527     SDValue One = DAG.getConstant(1, DL, OpVT);
3528     SDValue Two = DAG.getConstant(2, DL, OpVT);
3529     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3530     AddToWorklist(Add.getNode());
3531     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3532   }
3533 
3534   // Try more general transforms if the predicates match and the only user of
3535   // the compares is the 'and' or 'or'.
3536   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3537       N0.hasOneUse() && N1.hasOneUse()) {
3538     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3539     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3540     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3541       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3542       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3543       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3544       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3545       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3546     }
3547   }
3548 
3549   // Canonicalize equivalent operands to LL == RL.
3550   if (LL == RR && LR == RL) {
3551     CC1 = ISD::getSetCCSwappedOperands(CC1);
3552     std::swap(RL, RR);
3553   }
3554 
3555   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3556   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3557   if (LL == RL && LR == RR) {
3558     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3559                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3560     if (NewCC != ISD::SETCC_INVALID &&
3561         (!LegalOperations ||
3562          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3563           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3564       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3565   }
3566 
3567   return SDValue();
3568 }
3569 
3570 /// This contains all DAGCombine rules which reduce two values combined by
3571 /// an And operation to a single value. This makes them reusable in the context
3572 /// of visitSELECT(). Rules involving constants are not included as
3573 /// visitSELECT() already handles those cases.
3574 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3575   EVT VT = N1.getValueType();
3576   SDLoc DL(N);
3577 
3578   // fold (and x, undef) -> 0
3579   if (N0.isUndef() || N1.isUndef())
3580     return DAG.getConstant(0, DL, VT);
3581 
3582   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3583     return V;
3584 
3585   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3586       VT.getSizeInBits() <= 64) {
3587     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3588       APInt ADDC = ADDI->getAPIntValue();
3589       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3590         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3591         // immediate for an add, but it is legal if its top c2 bits are set,
3592         // transform the ADD so the immediate doesn't need to be materialized
3593         // in a register.
3594         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3595           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3596                                              SRLI->getZExtValue());
3597           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3598             ADDC |= Mask;
3599             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3600               SDLoc DL0(N0);
3601               SDValue NewAdd =
3602                 DAG.getNode(ISD::ADD, DL0, VT,
3603                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3604               CombineTo(N0.getNode(), NewAdd);
3605               // Return N so it doesn't get rechecked!
3606               return SDValue(N, 0);
3607             }
3608           }
3609         }
3610       }
3611     }
3612   }
3613 
3614   // Reduce bit extract of low half of an integer to the narrower type.
3615   // (and (srl i64:x, K), KMask) ->
3616   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3617   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3618     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3619       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3620         unsigned Size = VT.getSizeInBits();
3621         const APInt &AndMask = CAnd->getAPIntValue();
3622         unsigned ShiftBits = CShift->getZExtValue();
3623 
3624         // Bail out, this node will probably disappear anyway.
3625         if (ShiftBits == 0)
3626           return SDValue();
3627 
3628         unsigned MaskBits = AndMask.countTrailingOnes();
3629         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3630 
3631         if (AndMask.isMask() &&
3632             // Required bits must not span the two halves of the integer and
3633             // must fit in the half size type.
3634             (ShiftBits + MaskBits <= Size / 2) &&
3635             TLI.isNarrowingProfitable(VT, HalfVT) &&
3636             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3637             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3638             TLI.isTruncateFree(VT, HalfVT) &&
3639             TLI.isZExtFree(HalfVT, VT)) {
3640           // The isNarrowingProfitable is to avoid regressions on PPC and
3641           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3642           // on downstream users of this. Those patterns could probably be
3643           // extended to handle extensions mixed in.
3644 
3645           SDValue SL(N0);
3646           assert(MaskBits <= Size);
3647 
3648           // Extracting the highest bit of the low half.
3649           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3650           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3651                                       N0.getOperand(0));
3652 
3653           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3654           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3655           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3656           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3657           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3658         }
3659       }
3660     }
3661   }
3662 
3663   return SDValue();
3664 }
3665 
3666 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3667                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3668                                    bool &NarrowLoad) {
3669   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3670 
3671   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3672     return false;
3673 
3674   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3675   LoadedVT = LoadN->getMemoryVT();
3676 
3677   if (ExtVT == LoadedVT &&
3678       (!LegalOperations ||
3679        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3680     // ZEXTLOAD will match without needing to change the size of the value being
3681     // loaded.
3682     NarrowLoad = false;
3683     return true;
3684   }
3685 
3686   // Do not change the width of a volatile load.
3687   if (LoadN->isVolatile())
3688     return false;
3689 
3690   // Do not generate loads of non-round integer types since these can
3691   // be expensive (and would be wrong if the type is not byte sized).
3692   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3693     return false;
3694 
3695   if (LegalOperations &&
3696       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3697     return false;
3698 
3699   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3700     return false;
3701 
3702   NarrowLoad = true;
3703   return true;
3704 }
3705 
3706 SDValue DAGCombiner::visitAND(SDNode *N) {
3707   SDValue N0 = N->getOperand(0);
3708   SDValue N1 = N->getOperand(1);
3709   EVT VT = N1.getValueType();
3710 
3711   // x & x --> x
3712   if (N0 == N1)
3713     return N0;
3714 
3715   // fold vector ops
3716   if (VT.isVector()) {
3717     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3718       return FoldedVOp;
3719 
3720     // fold (and x, 0) -> 0, vector edition
3721     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3722       // do not return N0, because undef node may exist in N0
3723       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3724                              SDLoc(N), N0.getValueType());
3725     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3726       // do not return N1, because undef node may exist in N1
3727       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3728                              SDLoc(N), N1.getValueType());
3729 
3730     // fold (and x, -1) -> x, vector edition
3731     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3732       return N1;
3733     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3734       return N0;
3735   }
3736 
3737   // fold (and c1, c2) -> c1&c2
3738   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3739   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3740   if (N0C && N1C && !N1C->isOpaque())
3741     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3742   // canonicalize constant to RHS
3743   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3744      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3745     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3746   // fold (and x, -1) -> x
3747   if (isAllOnesConstant(N1))
3748     return N0;
3749   // if (and x, c) is known to be zero, return 0
3750   unsigned BitWidth = VT.getScalarSizeInBits();
3751   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3752                                    APInt::getAllOnesValue(BitWidth)))
3753     return DAG.getConstant(0, SDLoc(N), VT);
3754 
3755   if (SDValue NewSel = foldBinOpIntoSelect(N))
3756     return NewSel;
3757 
3758   // reassociate and
3759   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3760     return RAND;
3761   // fold (and (or x, C), D) -> D if (C & D) == D
3762   if (N1C && N0.getOpcode() == ISD::OR)
3763     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3764       if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3765         return N1;
3766   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3767   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3768     SDValue N0Op0 = N0.getOperand(0);
3769     APInt Mask = ~N1C->getAPIntValue();
3770     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3771     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3772       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3773                                  N0.getValueType(), N0Op0);
3774 
3775       // Replace uses of the AND with uses of the Zero extend node.
3776       CombineTo(N, Zext);
3777 
3778       // We actually want to replace all uses of the any_extend with the
3779       // zero_extend, to avoid duplicating things.  This will later cause this
3780       // AND to be folded.
3781       CombineTo(N0.getNode(), Zext);
3782       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3783     }
3784   }
3785   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3786   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3787   // already be zero by virtue of the width of the base type of the load.
3788   //
3789   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3790   // more cases.
3791   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3792        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3793        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3794        N0.getOperand(0).getResNo() == 0) ||
3795       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3796     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3797                                          N0 : N0.getOperand(0) );
3798 
3799     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3800     // This can be a pure constant or a vector splat, in which case we treat the
3801     // vector as a scalar and use the splat value.
3802     APInt Constant = APInt::getNullValue(1);
3803     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3804       Constant = C->getAPIntValue();
3805     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3806       APInt SplatValue, SplatUndef;
3807       unsigned SplatBitSize;
3808       bool HasAnyUndefs;
3809       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3810                                              SplatBitSize, HasAnyUndefs);
3811       if (IsSplat) {
3812         // Undef bits can contribute to a possible optimisation if set, so
3813         // set them.
3814         SplatValue |= SplatUndef;
3815 
3816         // The splat value may be something like "0x00FFFFFF", which means 0 for
3817         // the first vector value and FF for the rest, repeating. We need a mask
3818         // that will apply equally to all members of the vector, so AND all the
3819         // lanes of the constant together.
3820         EVT VT = Vector->getValueType(0);
3821         unsigned BitWidth = VT.getScalarSizeInBits();
3822 
3823         // If the splat value has been compressed to a bitlength lower
3824         // than the size of the vector lane, we need to re-expand it to
3825         // the lane size.
3826         if (BitWidth > SplatBitSize)
3827           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3828                SplatBitSize < BitWidth;
3829                SplatBitSize = SplatBitSize * 2)
3830             SplatValue |= SplatValue.shl(SplatBitSize);
3831 
3832         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3833         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3834         if (SplatBitSize % BitWidth == 0) {
3835           Constant = APInt::getAllOnesValue(BitWidth);
3836           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3837             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3838         }
3839       }
3840     }
3841 
3842     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3843     // actually legal and isn't going to get expanded, else this is a false
3844     // optimisation.
3845     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3846                                                     Load->getValueType(0),
3847                                                     Load->getMemoryVT());
3848 
3849     // Resize the constant to the same size as the original memory access before
3850     // extension. If it is still the AllOnesValue then this AND is completely
3851     // unneeded.
3852     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3853 
3854     bool B;
3855     switch (Load->getExtensionType()) {
3856     default: B = false; break;
3857     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3858     case ISD::ZEXTLOAD:
3859     case ISD::NON_EXTLOAD: B = true; break;
3860     }
3861 
3862     if (B && Constant.isAllOnesValue()) {
3863       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3864       // preserve semantics once we get rid of the AND.
3865       SDValue NewLoad(Load, 0);
3866 
3867       // Fold the AND away. NewLoad may get replaced immediately.
3868       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3869 
3870       if (Load->getExtensionType() == ISD::EXTLOAD) {
3871         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3872                               Load->getValueType(0), SDLoc(Load),
3873                               Load->getChain(), Load->getBasePtr(),
3874                               Load->getOffset(), Load->getMemoryVT(),
3875                               Load->getMemOperand());
3876         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3877         if (Load->getNumValues() == 3) {
3878           // PRE/POST_INC loads have 3 values.
3879           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3880                            NewLoad.getValue(2) };
3881           CombineTo(Load, To, 3, true);
3882         } else {
3883           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3884         }
3885       }
3886 
3887       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3888     }
3889   }
3890 
3891   // fold (and (load x), 255) -> (zextload x, i8)
3892   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3893   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3894   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3895                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3896                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3897     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3898     LoadSDNode *LN0 = HasAnyExt
3899       ? cast<LoadSDNode>(N0.getOperand(0))
3900       : cast<LoadSDNode>(N0);
3901     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3902         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3903       auto NarrowLoad = false;
3904       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3905       EVT ExtVT, LoadedVT;
3906       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3907                            NarrowLoad)) {
3908         if (!NarrowLoad) {
3909           SDValue NewLoad =
3910             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3911                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3912                            LN0->getMemOperand());
3913           AddToWorklist(N);
3914           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3915           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3916         } else {
3917           EVT PtrType = LN0->getOperand(1).getValueType();
3918 
3919           unsigned Alignment = LN0->getAlignment();
3920           SDValue NewPtr = LN0->getBasePtr();
3921 
3922           // For big endian targets, we need to add an offset to the pointer
3923           // to load the correct bytes.  For little endian systems, we merely
3924           // need to read fewer bytes from the same pointer.
3925           if (DAG.getDataLayout().isBigEndian()) {
3926             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3927             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3928             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3929             SDLoc DL(LN0);
3930             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3931                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3932             Alignment = MinAlign(Alignment, PtrOff);
3933           }
3934 
3935           AddToWorklist(NewPtr.getNode());
3936 
3937           SDValue Load = DAG.getExtLoad(
3938               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3939               LN0->getPointerInfo(), ExtVT, Alignment,
3940               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3941           AddToWorklist(N);
3942           CombineTo(LN0, Load, Load.getValue(1));
3943           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3944         }
3945       }
3946     }
3947   }
3948 
3949   if (SDValue Combined = visitANDLike(N0, N1, N))
3950     return Combined;
3951 
3952   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3953   if (N0.getOpcode() == N1.getOpcode())
3954     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3955       return Tmp;
3956 
3957   // Masking the negated extension of a boolean is just the zero-extended
3958   // boolean:
3959   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3960   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3961   //
3962   // Note: the SimplifyDemandedBits fold below can make an information-losing
3963   // transform, and then we have no way to find this better fold.
3964   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3965     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
3966       SDValue SubRHS = N0.getOperand(1);
3967       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3968           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3969         return SubRHS;
3970       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3971           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3972         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3973     }
3974   }
3975 
3976   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3977   // fold (and (sra)) -> (and (srl)) when possible.
3978   if (SimplifyDemandedBits(SDValue(N, 0)))
3979     return SDValue(N, 0);
3980 
3981   // fold (zext_inreg (extload x)) -> (zextload x)
3982   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3983     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3984     EVT MemVT = LN0->getMemoryVT();
3985     // If we zero all the possible extended bits, then we can turn this into
3986     // a zextload if we are running before legalize or the operation is legal.
3987     unsigned BitWidth = N1.getScalarValueSizeInBits();
3988     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3989                            BitWidth - MemVT.getScalarSizeInBits())) &&
3990         ((!LegalOperations && !LN0->isVolatile()) ||
3991          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3992       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3993                                        LN0->getChain(), LN0->getBasePtr(),
3994                                        MemVT, LN0->getMemOperand());
3995       AddToWorklist(N);
3996       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3997       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3998     }
3999   }
4000   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4001   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4002       N0.hasOneUse()) {
4003     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4004     EVT MemVT = LN0->getMemoryVT();
4005     // If we zero all the possible extended bits, then we can turn this into
4006     // a zextload if we are running before legalize or the operation is legal.
4007     unsigned BitWidth = N1.getScalarValueSizeInBits();
4008     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4009                            BitWidth - MemVT.getScalarSizeInBits())) &&
4010         ((!LegalOperations && !LN0->isVolatile()) ||
4011          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4012       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4013                                        LN0->getChain(), LN0->getBasePtr(),
4014                                        MemVT, LN0->getMemOperand());
4015       AddToWorklist(N);
4016       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4017       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4018     }
4019   }
4020   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4021   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4022     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4023                                            N0.getOperand(1), false))
4024       return BSwap;
4025   }
4026 
4027   return SDValue();
4028 }
4029 
4030 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4031 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4032                                         bool DemandHighBits) {
4033   if (!LegalOperations)
4034     return SDValue();
4035 
4036   EVT VT = N->getValueType(0);
4037   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4038     return SDValue();
4039   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4040     return SDValue();
4041 
4042   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4043   bool LookPassAnd0 = false;
4044   bool LookPassAnd1 = false;
4045   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4046       std::swap(N0, N1);
4047   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4048       std::swap(N0, N1);
4049   if (N0.getOpcode() == ISD::AND) {
4050     if (!N0.getNode()->hasOneUse())
4051       return SDValue();
4052     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4053     if (!N01C || N01C->getZExtValue() != 0xFF00)
4054       return SDValue();
4055     N0 = N0.getOperand(0);
4056     LookPassAnd0 = true;
4057   }
4058 
4059   if (N1.getOpcode() == ISD::AND) {
4060     if (!N1.getNode()->hasOneUse())
4061       return SDValue();
4062     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4063     if (!N11C || N11C->getZExtValue() != 0xFF)
4064       return SDValue();
4065     N1 = N1.getOperand(0);
4066     LookPassAnd1 = true;
4067   }
4068 
4069   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4070     std::swap(N0, N1);
4071   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4072     return SDValue();
4073   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4074     return SDValue();
4075 
4076   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4077   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4078   if (!N01C || !N11C)
4079     return SDValue();
4080   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4081     return SDValue();
4082 
4083   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4084   SDValue N00 = N0->getOperand(0);
4085   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4086     if (!N00.getNode()->hasOneUse())
4087       return SDValue();
4088     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4089     if (!N001C || N001C->getZExtValue() != 0xFF)
4090       return SDValue();
4091     N00 = N00.getOperand(0);
4092     LookPassAnd0 = true;
4093   }
4094 
4095   SDValue N10 = N1->getOperand(0);
4096   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4097     if (!N10.getNode()->hasOneUse())
4098       return SDValue();
4099     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4100     if (!N101C || N101C->getZExtValue() != 0xFF00)
4101       return SDValue();
4102     N10 = N10.getOperand(0);
4103     LookPassAnd1 = true;
4104   }
4105 
4106   if (N00 != N10)
4107     return SDValue();
4108 
4109   // Make sure everything beyond the low halfword gets set to zero since the SRL
4110   // 16 will clear the top bits.
4111   unsigned OpSizeInBits = VT.getSizeInBits();
4112   if (DemandHighBits && OpSizeInBits > 16) {
4113     // If the left-shift isn't masked out then the only way this is a bswap is
4114     // if all bits beyond the low 8 are 0. In that case the entire pattern
4115     // reduces to a left shift anyway: leave it for other parts of the combiner.
4116     if (!LookPassAnd0)
4117       return SDValue();
4118 
4119     // However, if the right shift isn't masked out then it might be because
4120     // it's not needed. See if we can spot that too.
4121     if (!LookPassAnd1 &&
4122         !DAG.MaskedValueIsZero(
4123             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4124       return SDValue();
4125   }
4126 
4127   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4128   if (OpSizeInBits > 16) {
4129     SDLoc DL(N);
4130     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4131                       DAG.getConstant(OpSizeInBits - 16, DL,
4132                                       getShiftAmountTy(VT)));
4133   }
4134   return Res;
4135 }
4136 
4137 /// Return true if the specified node is an element that makes up a 32-bit
4138 /// packed halfword byteswap.
4139 /// ((x & 0x000000ff) << 8) |
4140 /// ((x & 0x0000ff00) >> 8) |
4141 /// ((x & 0x00ff0000) << 8) |
4142 /// ((x & 0xff000000) >> 8)
4143 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4144   if (!N.getNode()->hasOneUse())
4145     return false;
4146 
4147   unsigned Opc = N.getOpcode();
4148   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4149     return false;
4150 
4151   SDValue N0 = N.getOperand(0);
4152   unsigned Opc0 = N0.getOpcode();
4153   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4154     return false;
4155 
4156   ConstantSDNode *N1C = nullptr;
4157   // SHL or SRL: look upstream for AND mask operand
4158   if (Opc == ISD::AND)
4159     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4160   else if (Opc0 == ISD::AND)
4161     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4162   if (!N1C)
4163     return false;
4164 
4165   unsigned MaskByteOffset;
4166   switch (N1C->getZExtValue()) {
4167   default:
4168     return false;
4169   case 0xFF:       MaskByteOffset = 0; break;
4170   case 0xFF00:     MaskByteOffset = 1; break;
4171   case 0xFF0000:   MaskByteOffset = 2; break;
4172   case 0xFF000000: MaskByteOffset = 3; break;
4173   }
4174 
4175   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4176   if (Opc == ISD::AND) {
4177     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4178       // (x >> 8) & 0xff
4179       // (x >> 8) & 0xff0000
4180       if (Opc0 != ISD::SRL)
4181         return false;
4182       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4183       if (!C || C->getZExtValue() != 8)
4184         return false;
4185     } else {
4186       // (x << 8) & 0xff00
4187       // (x << 8) & 0xff000000
4188       if (Opc0 != ISD::SHL)
4189         return false;
4190       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4191       if (!C || C->getZExtValue() != 8)
4192         return false;
4193     }
4194   } else if (Opc == ISD::SHL) {
4195     // (x & 0xff) << 8
4196     // (x & 0xff0000) << 8
4197     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4198       return false;
4199     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4200     if (!C || C->getZExtValue() != 8)
4201       return false;
4202   } else { // Opc == ISD::SRL
4203     // (x & 0xff00) >> 8
4204     // (x & 0xff000000) >> 8
4205     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4206       return false;
4207     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4208     if (!C || C->getZExtValue() != 8)
4209       return false;
4210   }
4211 
4212   if (Parts[MaskByteOffset])
4213     return false;
4214 
4215   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4216   return true;
4217 }
4218 
4219 /// Match a 32-bit packed halfword bswap. That is
4220 /// ((x & 0x000000ff) << 8) |
4221 /// ((x & 0x0000ff00) >> 8) |
4222 /// ((x & 0x00ff0000) << 8) |
4223 /// ((x & 0xff000000) >> 8)
4224 /// => (rotl (bswap x), 16)
4225 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4226   if (!LegalOperations)
4227     return SDValue();
4228 
4229   EVT VT = N->getValueType(0);
4230   if (VT != MVT::i32)
4231     return SDValue();
4232   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4233     return SDValue();
4234 
4235   // Look for either
4236   // (or (or (and), (and)), (or (and), (and)))
4237   // (or (or (or (and), (and)), (and)), (and))
4238   if (N0.getOpcode() != ISD::OR)
4239     return SDValue();
4240   SDValue N00 = N0.getOperand(0);
4241   SDValue N01 = N0.getOperand(1);
4242   SDNode *Parts[4] = {};
4243 
4244   if (N1.getOpcode() == ISD::OR &&
4245       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4246     // (or (or (and), (and)), (or (and), (and)))
4247     if (!isBSwapHWordElement(N00, Parts))
4248       return SDValue();
4249 
4250     if (!isBSwapHWordElement(N01, Parts))
4251       return SDValue();
4252     SDValue N10 = N1.getOperand(0);
4253     if (!isBSwapHWordElement(N10, Parts))
4254       return SDValue();
4255     SDValue N11 = N1.getOperand(1);
4256     if (!isBSwapHWordElement(N11, Parts))
4257       return SDValue();
4258   } else {
4259     // (or (or (or (and), (and)), (and)), (and))
4260     if (!isBSwapHWordElement(N1, Parts))
4261       return SDValue();
4262     if (!isBSwapHWordElement(N01, Parts))
4263       return SDValue();
4264     if (N00.getOpcode() != ISD::OR)
4265       return SDValue();
4266     SDValue N000 = N00.getOperand(0);
4267     if (!isBSwapHWordElement(N000, Parts))
4268       return SDValue();
4269     SDValue N001 = N00.getOperand(1);
4270     if (!isBSwapHWordElement(N001, Parts))
4271       return SDValue();
4272   }
4273 
4274   // Make sure the parts are all coming from the same node.
4275   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4276     return SDValue();
4277 
4278   SDLoc DL(N);
4279   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4280                               SDValue(Parts[0], 0));
4281 
4282   // Result of the bswap should be rotated by 16. If it's not legal, then
4283   // do  (x << 16) | (x >> 16).
4284   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4285   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4286     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4287   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4288     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4289   return DAG.getNode(ISD::OR, DL, VT,
4290                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4291                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4292 }
4293 
4294 /// This contains all DAGCombine rules which reduce two values combined by
4295 /// an Or operation to a single value \see visitANDLike().
4296 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4297   EVT VT = N1.getValueType();
4298   SDLoc DL(N);
4299 
4300   // fold (or x, undef) -> -1
4301   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4302     return DAG.getAllOnesConstant(DL, VT);
4303 
4304   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4305     return V;
4306 
4307   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4308   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4309       // Don't increase # computations.
4310       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4311     // We can only do this xform if we know that bits from X that are set in C2
4312     // but not in C1 are already zero.  Likewise for Y.
4313     if (const ConstantSDNode *N0O1C =
4314         getAsNonOpaqueConstant(N0.getOperand(1))) {
4315       if (const ConstantSDNode *N1O1C =
4316           getAsNonOpaqueConstant(N1.getOperand(1))) {
4317         // We can only do this xform if we know that bits from X that are set in
4318         // C2 but not in C1 are already zero.  Likewise for Y.
4319         const APInt &LHSMask = N0O1C->getAPIntValue();
4320         const APInt &RHSMask = N1O1C->getAPIntValue();
4321 
4322         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4323             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4324           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4325                                   N0.getOperand(0), N1.getOperand(0));
4326           return DAG.getNode(ISD::AND, DL, VT, X,
4327                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4328         }
4329       }
4330     }
4331   }
4332 
4333   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4334   if (N0.getOpcode() == ISD::AND &&
4335       N1.getOpcode() == ISD::AND &&
4336       N0.getOperand(0) == N1.getOperand(0) &&
4337       // Don't increase # computations.
4338       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4339     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4340                             N0.getOperand(1), N1.getOperand(1));
4341     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4342   }
4343 
4344   return SDValue();
4345 }
4346 
4347 SDValue DAGCombiner::visitOR(SDNode *N) {
4348   SDValue N0 = N->getOperand(0);
4349   SDValue N1 = N->getOperand(1);
4350   EVT VT = N1.getValueType();
4351 
4352   // x | x --> x
4353   if (N0 == N1)
4354     return N0;
4355 
4356   // fold vector ops
4357   if (VT.isVector()) {
4358     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4359       return FoldedVOp;
4360 
4361     // fold (or x, 0) -> x, vector edition
4362     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4363       return N1;
4364     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4365       return N0;
4366 
4367     // fold (or x, -1) -> -1, vector edition
4368     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4369       // do not return N0, because undef node may exist in N0
4370       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4371     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4372       // do not return N1, because undef node may exist in N1
4373       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4374 
4375     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4376     // Do this only if the resulting shuffle is legal.
4377     if (isa<ShuffleVectorSDNode>(N0) &&
4378         isa<ShuffleVectorSDNode>(N1) &&
4379         // Avoid folding a node with illegal type.
4380         TLI.isTypeLegal(VT)) {
4381       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4382       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4383       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4384       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4385       // Ensure both shuffles have a zero input.
4386       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4387         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4388         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4389         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4390         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4391         bool CanFold = true;
4392         int NumElts = VT.getVectorNumElements();
4393         SmallVector<int, 4> Mask(NumElts);
4394 
4395         for (int i = 0; i != NumElts; ++i) {
4396           int M0 = SV0->getMaskElt(i);
4397           int M1 = SV1->getMaskElt(i);
4398 
4399           // Determine if either index is pointing to a zero vector.
4400           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4401           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4402 
4403           // If one element is zero and the otherside is undef, keep undef.
4404           // This also handles the case that both are undef.
4405           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4406             Mask[i] = -1;
4407             continue;
4408           }
4409 
4410           // Make sure only one of the elements is zero.
4411           if (M0Zero == M1Zero) {
4412             CanFold = false;
4413             break;
4414           }
4415 
4416           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4417 
4418           // We have a zero and non-zero element. If the non-zero came from
4419           // SV0 make the index a LHS index. If it came from SV1, make it
4420           // a RHS index. We need to mod by NumElts because we don't care
4421           // which operand it came from in the original shuffles.
4422           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4423         }
4424 
4425         if (CanFold) {
4426           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4427           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4428 
4429           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4430           if (!LegalMask) {
4431             std::swap(NewLHS, NewRHS);
4432             ShuffleVectorSDNode::commuteMask(Mask);
4433             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4434           }
4435 
4436           if (LegalMask)
4437             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4438         }
4439       }
4440     }
4441   }
4442 
4443   // fold (or c1, c2) -> c1|c2
4444   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4445   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4446   if (N0C && N1C && !N1C->isOpaque())
4447     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4448   // canonicalize constant to RHS
4449   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4450      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4451     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4452   // fold (or x, 0) -> x
4453   if (isNullConstant(N1))
4454     return N0;
4455   // fold (or x, -1) -> -1
4456   if (isAllOnesConstant(N1))
4457     return N1;
4458 
4459   if (SDValue NewSel = foldBinOpIntoSelect(N))
4460     return NewSel;
4461 
4462   // fold (or x, c) -> c iff (x & ~c) == 0
4463   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4464     return N1;
4465 
4466   if (SDValue Combined = visitORLike(N0, N1, N))
4467     return Combined;
4468 
4469   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4470   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4471     return BSwap;
4472   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4473     return BSwap;
4474 
4475   // reassociate or
4476   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4477     return ROR;
4478 
4479   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4480   // iff (c1 & c2) != 0.
4481   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4482     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4483       if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4484         if (SDValue COR =
4485                 DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4486           return DAG.getNode(
4487               ISD::AND, SDLoc(N), VT,
4488               DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4489         return SDValue();
4490       }
4491     }
4492   }
4493 
4494   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4495   if (N0.getOpcode() == N1.getOpcode())
4496     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4497       return Tmp;
4498 
4499   // See if this is some rotate idiom.
4500   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4501     return SDValue(Rot, 0);
4502 
4503   if (SDValue Load = MatchLoadCombine(N))
4504     return Load;
4505 
4506   // Simplify the operands using demanded-bits information.
4507   if (SimplifyDemandedBits(SDValue(N, 0)))
4508     return SDValue(N, 0);
4509 
4510   return SDValue();
4511 }
4512 
4513 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4514 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4515   if (Op.getOpcode() == ISD::AND) {
4516     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4517       Mask = Op.getOperand(1);
4518       Op = Op.getOperand(0);
4519     } else {
4520       return false;
4521     }
4522   }
4523 
4524   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4525     Shift = Op;
4526     return true;
4527   }
4528 
4529   return false;
4530 }
4531 
4532 // Return true if we can prove that, whenever Neg and Pos are both in the
4533 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4534 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4535 //
4536 //     (or (shift1 X, Neg), (shift2 X, Pos))
4537 //
4538 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4539 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4540 // to consider shift amounts with defined behavior.
4541 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4542   // If EltSize is a power of 2 then:
4543   //
4544   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4545   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4546   //
4547   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4548   // for the stronger condition:
4549   //
4550   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4551   //
4552   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4553   // we can just replace Neg with Neg' for the rest of the function.
4554   //
4555   // In other cases we check for the even stronger condition:
4556   //
4557   //     Neg == EltSize - Pos                                    [B]
4558   //
4559   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4560   // behavior if Pos == 0 (and consequently Neg == EltSize).
4561   //
4562   // We could actually use [A] whenever EltSize is a power of 2, but the
4563   // only extra cases that it would match are those uninteresting ones
4564   // where Neg and Pos are never in range at the same time.  E.g. for
4565   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4566   // as well as (sub 32, Pos), but:
4567   //
4568   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4569   //
4570   // always invokes undefined behavior for 32-bit X.
4571   //
4572   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4573   unsigned MaskLoBits = 0;
4574   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4575     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4576       if (NegC->getAPIntValue() == EltSize - 1) {
4577         Neg = Neg.getOperand(0);
4578         MaskLoBits = Log2_64(EltSize);
4579       }
4580     }
4581   }
4582 
4583   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4584   if (Neg.getOpcode() != ISD::SUB)
4585     return false;
4586   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4587   if (!NegC)
4588     return false;
4589   SDValue NegOp1 = Neg.getOperand(1);
4590 
4591   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4592   // Pos'.  The truncation is redundant for the purpose of the equality.
4593   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4594     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4595       if (PosC->getAPIntValue() == EltSize - 1)
4596         Pos = Pos.getOperand(0);
4597 
4598   // The condition we need is now:
4599   //
4600   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4601   //
4602   // If NegOp1 == Pos then we need:
4603   //
4604   //              EltSize & Mask == NegC & Mask
4605   //
4606   // (because "x & Mask" is a truncation and distributes through subtraction).
4607   APInt Width;
4608   if (Pos == NegOp1)
4609     Width = NegC->getAPIntValue();
4610 
4611   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4612   // Then the condition we want to prove becomes:
4613   //
4614   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4615   //
4616   // which, again because "x & Mask" is a truncation, becomes:
4617   //
4618   //                NegC & Mask == (EltSize - PosC) & Mask
4619   //             EltSize & Mask == (NegC + PosC) & Mask
4620   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4621     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4622       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4623     else
4624       return false;
4625   } else
4626     return false;
4627 
4628   // Now we just need to check that EltSize & Mask == Width & Mask.
4629   if (MaskLoBits)
4630     // EltSize & Mask is 0 since Mask is EltSize - 1.
4631     return Width.getLoBits(MaskLoBits) == 0;
4632   return Width == EltSize;
4633 }
4634 
4635 // A subroutine of MatchRotate used once we have found an OR of two opposite
4636 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4637 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4638 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4639 // Neg with outer conversions stripped away.
4640 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4641                                        SDValue Neg, SDValue InnerPos,
4642                                        SDValue InnerNeg, unsigned PosOpcode,
4643                                        unsigned NegOpcode, const SDLoc &DL) {
4644   // fold (or (shl x, (*ext y)),
4645   //          (srl x, (*ext (sub 32, y)))) ->
4646   //   (rotl x, y) or (rotr x, (sub 32, y))
4647   //
4648   // fold (or (shl x, (*ext (sub 32, y))),
4649   //          (srl x, (*ext y))) ->
4650   //   (rotr x, y) or (rotl x, (sub 32, y))
4651   EVT VT = Shifted.getValueType();
4652   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4653     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4654     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4655                        HasPos ? Pos : Neg).getNode();
4656   }
4657 
4658   return nullptr;
4659 }
4660 
4661 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4662 // idioms for rotate, and if the target supports rotation instructions, generate
4663 // a rot[lr].
4664 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4665   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4666   EVT VT = LHS.getValueType();
4667   if (!TLI.isTypeLegal(VT)) return nullptr;
4668 
4669   // The target must have at least one rotate flavor.
4670   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4671   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4672   if (!HasROTL && !HasROTR) return nullptr;
4673 
4674   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4675   SDValue LHSShift;   // The shift.
4676   SDValue LHSMask;    // AND value if any.
4677   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4678     return nullptr; // Not part of a rotate.
4679 
4680   SDValue RHSShift;   // The shift.
4681   SDValue RHSMask;    // AND value if any.
4682   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4683     return nullptr; // Not part of a rotate.
4684 
4685   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4686     return nullptr;   // Not shifting the same value.
4687 
4688   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4689     return nullptr;   // Shifts must disagree.
4690 
4691   // Canonicalize shl to left side in a shl/srl pair.
4692   if (RHSShift.getOpcode() == ISD::SHL) {
4693     std::swap(LHS, RHS);
4694     std::swap(LHSShift, RHSShift);
4695     std::swap(LHSMask, RHSMask);
4696   }
4697 
4698   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4699   SDValue LHSShiftArg = LHSShift.getOperand(0);
4700   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4701   SDValue RHSShiftArg = RHSShift.getOperand(0);
4702   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4703 
4704   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4705   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4706   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4707                                         ConstantSDNode *RHS) {
4708     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4709   };
4710   if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4711     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4712                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4713 
4714     // If there is an AND of either shifted operand, apply it to the result.
4715     if (LHSMask.getNode() || RHSMask.getNode()) {
4716       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4717       SDValue Mask = AllOnes;
4718 
4719       if (LHSMask.getNode()) {
4720         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4721         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4722                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4723       }
4724       if (RHSMask.getNode()) {
4725         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4726         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4727                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4728       }
4729 
4730       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4731     }
4732 
4733     return Rot.getNode();
4734   }
4735 
4736   // If there is a mask here, and we have a variable shift, we can't be sure
4737   // that we're masking out the right stuff.
4738   if (LHSMask.getNode() || RHSMask.getNode())
4739     return nullptr;
4740 
4741   // If the shift amount is sign/zext/any-extended just peel it off.
4742   SDValue LExtOp0 = LHSShiftAmt;
4743   SDValue RExtOp0 = RHSShiftAmt;
4744   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4745        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4746        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4747        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4748       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4749        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4750        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4751        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4752     LExtOp0 = LHSShiftAmt.getOperand(0);
4753     RExtOp0 = RHSShiftAmt.getOperand(0);
4754   }
4755 
4756   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4757                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4758   if (TryL)
4759     return TryL;
4760 
4761   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4762                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4763   if (TryR)
4764     return TryR;
4765 
4766   return nullptr;
4767 }
4768 
4769 namespace {
4770 /// Represents known origin of an individual byte in load combine pattern. The
4771 /// value of the byte is either constant zero or comes from memory.
4772 struct ByteProvider {
4773   // For constant zero providers Load is set to nullptr. For memory providers
4774   // Load represents the node which loads the byte from memory.
4775   // ByteOffset is the offset of the byte in the value produced by the load.
4776   LoadSDNode *Load;
4777   unsigned ByteOffset;
4778 
4779   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4780 
4781   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4782     return ByteProvider(Load, ByteOffset);
4783   }
4784   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4785 
4786   bool isConstantZero() const { return !Load; }
4787   bool isMemory() const { return Load; }
4788 
4789   bool operator==(const ByteProvider &Other) const {
4790     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4791   }
4792 
4793 private:
4794   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4795       : Load(Load), ByteOffset(ByteOffset) {}
4796 };
4797 
4798 /// Recursively traverses the expression calculating the origin of the requested
4799 /// byte of the given value. Returns None if the provider can't be calculated.
4800 ///
4801 /// For all the values except the root of the expression verifies that the value
4802 /// has exactly one use and if it's not true return None. This way if the origin
4803 /// of the byte is returned it's guaranteed that the values which contribute to
4804 /// the byte are not used outside of this expression.
4805 ///
4806 /// Because the parts of the expression are not allowed to have more than one
4807 /// use this function iterates over trees, not DAGs. So it never visits the same
4808 /// node more than once.
4809 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4810                                                    unsigned Depth,
4811                                                    bool Root = false) {
4812   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4813   if (Depth == 10)
4814     return None;
4815 
4816   if (!Root && !Op.hasOneUse())
4817     return None;
4818 
4819   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4820   unsigned BitWidth = Op.getValueSizeInBits();
4821   if (BitWidth % 8 != 0)
4822     return None;
4823   unsigned ByteWidth = BitWidth / 8;
4824   assert(Index < ByteWidth && "invalid index requested");
4825   (void) ByteWidth;
4826 
4827   switch (Op.getOpcode()) {
4828   case ISD::OR: {
4829     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4830     if (!LHS)
4831       return None;
4832     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4833     if (!RHS)
4834       return None;
4835 
4836     if (LHS->isConstantZero())
4837       return RHS;
4838     if (RHS->isConstantZero())
4839       return LHS;
4840     return None;
4841   }
4842   case ISD::SHL: {
4843     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4844     if (!ShiftOp)
4845       return None;
4846 
4847     uint64_t BitShift = ShiftOp->getZExtValue();
4848     if (BitShift % 8 != 0)
4849       return None;
4850     uint64_t ByteShift = BitShift / 8;
4851 
4852     return Index < ByteShift
4853                ? ByteProvider::getConstantZero()
4854                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4855                                        Depth + 1);
4856   }
4857   case ISD::ANY_EXTEND:
4858   case ISD::SIGN_EXTEND:
4859   case ISD::ZERO_EXTEND: {
4860     SDValue NarrowOp = Op->getOperand(0);
4861     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4862     if (NarrowBitWidth % 8 != 0)
4863       return None;
4864     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4865 
4866     if (Index >= NarrowByteWidth)
4867       return Op.getOpcode() == ISD::ZERO_EXTEND
4868                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4869                  : None;
4870     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4871   }
4872   case ISD::BSWAP:
4873     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4874                                  Depth + 1);
4875   case ISD::LOAD: {
4876     auto L = cast<LoadSDNode>(Op.getNode());
4877     if (L->isVolatile() || L->isIndexed())
4878       return None;
4879 
4880     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4881     if (NarrowBitWidth % 8 != 0)
4882       return None;
4883     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4884 
4885     if (Index >= NarrowByteWidth)
4886       return L->getExtensionType() == ISD::ZEXTLOAD
4887                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4888                  : None;
4889     return ByteProvider::getMemory(L, Index);
4890   }
4891   }
4892 
4893   return None;
4894 }
4895 } // namespace
4896 
4897 /// Match a pattern where a wide type scalar value is loaded by several narrow
4898 /// loads and combined by shifts and ors. Fold it into a single load or a load
4899 /// and a BSWAP if the targets supports it.
4900 ///
4901 /// Assuming little endian target:
4902 ///  i8 *a = ...
4903 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4904 /// =>
4905 ///  i32 val = *((i32)a)
4906 ///
4907 ///  i8 *a = ...
4908 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4909 /// =>
4910 ///  i32 val = BSWAP(*((i32)a))
4911 ///
4912 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4913 /// interact well with the worklist mechanism. When a part of the pattern is
4914 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4915 /// but the root node of the pattern which triggers the load combine is not
4916 /// necessarily a direct user of the changed node. For example, once the address
4917 /// of t28 load is reassociated load combine won't be triggered:
4918 ///             t25: i32 = add t4, Constant:i32<2>
4919 ///           t26: i64 = sign_extend t25
4920 ///        t27: i64 = add t2, t26
4921 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4922 ///     t29: i32 = zero_extend t28
4923 ///   t32: i32 = shl t29, Constant:i8<8>
4924 /// t33: i32 = or t23, t32
4925 /// As a possible fix visitLoad can check if the load can be a part of a load
4926 /// combine pattern and add corresponding OR roots to the worklist.
4927 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4928   assert(N->getOpcode() == ISD::OR &&
4929          "Can only match load combining against OR nodes");
4930 
4931   // Handles simple types only
4932   EVT VT = N->getValueType(0);
4933   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4934     return SDValue();
4935   unsigned ByteWidth = VT.getSizeInBits() / 8;
4936 
4937   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4938   // Before legalize we can introduce too wide illegal loads which will be later
4939   // split into legal sized loads. This enables us to combine i64 load by i8
4940   // patterns to a couple of i32 loads on 32 bit targets.
4941   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4942     return SDValue();
4943 
4944   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4945     unsigned BW, unsigned i) { return i; };
4946   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4947     unsigned BW, unsigned i) { return BW - i - 1; };
4948 
4949   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4950   auto MemoryByteOffset = [&] (ByteProvider P) {
4951     assert(P.isMemory() && "Must be a memory byte provider");
4952     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4953     assert(LoadBitWidth % 8 == 0 &&
4954            "can only analyze providers for individual bytes not bit");
4955     unsigned LoadByteWidth = LoadBitWidth / 8;
4956     return IsBigEndianTarget
4957             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4958             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4959   };
4960 
4961   Optional<BaseIndexOffset> Base;
4962   SDValue Chain;
4963 
4964   SmallSet<LoadSDNode *, 8> Loads;
4965   Optional<ByteProvider> FirstByteProvider;
4966   int64_t FirstOffset = INT64_MAX;
4967 
4968   // Check if all the bytes of the OR we are looking at are loaded from the same
4969   // base address. Collect bytes offsets from Base address in ByteOffsets.
4970   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4971   for (unsigned i = 0; i < ByteWidth; i++) {
4972     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4973     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4974       return SDValue();
4975 
4976     LoadSDNode *L = P->Load;
4977     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4978            "Must be enforced by calculateByteProvider");
4979     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4980 
4981     // All loads must share the same chain
4982     SDValue LChain = L->getChain();
4983     if (!Chain)
4984       Chain = LChain;
4985     else if (Chain != LChain)
4986       return SDValue();
4987 
4988     // Loads must share the same base address
4989     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4990     int64_t ByteOffsetFromBase = 0;
4991     if (!Base)
4992       Base = Ptr;
4993     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
4994       return SDValue();
4995 
4996     // Calculate the offset of the current byte from the base address
4997     ByteOffsetFromBase += MemoryByteOffset(*P);
4998     ByteOffsets[i] = ByteOffsetFromBase;
4999 
5000     // Remember the first byte load
5001     if (ByteOffsetFromBase < FirstOffset) {
5002       FirstByteProvider = P;
5003       FirstOffset = ByteOffsetFromBase;
5004     }
5005 
5006     Loads.insert(L);
5007   }
5008   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
5009          "memory, so there must be at least one load which produces the value");
5010   assert(Base && "Base address of the accessed memory location must be set");
5011   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5012 
5013   // Check if the bytes of the OR we are looking at match with either big or
5014   // little endian value load
5015   bool BigEndian = true, LittleEndian = true;
5016   for (unsigned i = 0; i < ByteWidth; i++) {
5017     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5018     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5019     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5020     if (!BigEndian && !LittleEndian)
5021       return SDValue();
5022   }
5023   assert((BigEndian != LittleEndian) && "should be either or");
5024   assert(FirstByteProvider && "must be set");
5025 
5026   // Ensure that the first byte is loaded from zero offset of the first load.
5027   // So the combined value can be loaded from the first load address.
5028   if (MemoryByteOffset(*FirstByteProvider) != 0)
5029     return SDValue();
5030   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5031 
5032   // The node we are looking at matches with the pattern, check if we can
5033   // replace it with a single load and bswap if needed.
5034 
5035   // If the load needs byte swap check if the target supports it
5036   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5037 
5038   // Before legalize we can introduce illegal bswaps which will be later
5039   // converted to an explicit bswap sequence. This way we end up with a single
5040   // load and byte shuffling instead of several loads and byte shuffling.
5041   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5042     return SDValue();
5043 
5044   // Check that a load of the wide type is both allowed and fast on the target
5045   bool Fast = false;
5046   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5047                                         VT, FirstLoad->getAddressSpace(),
5048                                         FirstLoad->getAlignment(), &Fast);
5049   if (!Allowed || !Fast)
5050     return SDValue();
5051 
5052   SDValue NewLoad =
5053       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5054                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5055 
5056   // Transfer chain users from old loads to the new load.
5057   for (LoadSDNode *L : Loads)
5058     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5059 
5060   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5061 }
5062 
5063 SDValue DAGCombiner::visitXOR(SDNode *N) {
5064   SDValue N0 = N->getOperand(0);
5065   SDValue N1 = N->getOperand(1);
5066   EVT VT = N0.getValueType();
5067 
5068   // fold vector ops
5069   if (VT.isVector()) {
5070     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5071       return FoldedVOp;
5072 
5073     // fold (xor x, 0) -> x, vector edition
5074     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5075       return N1;
5076     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5077       return N0;
5078   }
5079 
5080   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5081   if (N0.isUndef() && N1.isUndef())
5082     return DAG.getConstant(0, SDLoc(N), VT);
5083   // fold (xor x, undef) -> undef
5084   if (N0.isUndef())
5085     return N0;
5086   if (N1.isUndef())
5087     return N1;
5088   // fold (xor c1, c2) -> c1^c2
5089   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5090   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5091   if (N0C && N1C)
5092     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5093   // canonicalize constant to RHS
5094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5095      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5096     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5097   // fold (xor x, 0) -> x
5098   if (isNullConstant(N1))
5099     return N0;
5100 
5101   if (SDValue NewSel = foldBinOpIntoSelect(N))
5102     return NewSel;
5103 
5104   // reassociate xor
5105   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5106     return RXOR;
5107 
5108   // fold !(x cc y) -> (x !cc y)
5109   SDValue LHS, RHS, CC;
5110   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5111     bool isInt = LHS.getValueType().isInteger();
5112     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5113                                                isInt);
5114 
5115     if (!LegalOperations ||
5116         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5117       switch (N0.getOpcode()) {
5118       default:
5119         llvm_unreachable("Unhandled SetCC Equivalent!");
5120       case ISD::SETCC:
5121         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5122       case ISD::SELECT_CC:
5123         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5124                                N0.getOperand(3), NotCC);
5125       }
5126     }
5127   }
5128 
5129   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5130   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5131       N0.getNode()->hasOneUse() &&
5132       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5133     SDValue V = N0.getOperand(0);
5134     SDLoc DL(N0);
5135     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5136                     DAG.getConstant(1, DL, V.getValueType()));
5137     AddToWorklist(V.getNode());
5138     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5139   }
5140 
5141   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5142   if (isOneConstant(N1) && VT == MVT::i1 &&
5143       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5144     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5145     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5146       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5147       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5148       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5149       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5150       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5151     }
5152   }
5153   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5154   if (isAllOnesConstant(N1) &&
5155       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5156     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5157     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5158       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5159       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5160       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5161       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5162       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5163     }
5164   }
5165   // fold (xor (and x, y), y) -> (and (not x), y)
5166   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5167       N0->getOperand(1) == N1) {
5168     SDValue X = N0->getOperand(0);
5169     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5170     AddToWorklist(NotX.getNode());
5171     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5172   }
5173   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5174   if (N1C && N0.getOpcode() == ISD::XOR) {
5175     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
5176       SDLoc DL(N);
5177       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5178                          DAG.getConstant(N1C->getAPIntValue() ^
5179                                          N00C->getAPIntValue(), DL, VT));
5180     }
5181     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5182       SDLoc DL(N);
5183       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5184                          DAG.getConstant(N1C->getAPIntValue() ^
5185                                          N01C->getAPIntValue(), DL, VT));
5186     }
5187   }
5188 
5189   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5190   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5191   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5192       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5193       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5194     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5195       if (C->getAPIntValue() == (OpSizeInBits - 1))
5196         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5197   }
5198 
5199   // fold (xor x, x) -> 0
5200   if (N0 == N1)
5201     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5202 
5203   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5204   // Here is a concrete example of this equivalence:
5205   // i16   x ==  14
5206   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5207   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5208   //
5209   // =>
5210   //
5211   // i16     ~1      == 0b1111111111111110
5212   // i16 rol(~1, 14) == 0b1011111111111111
5213   //
5214   // Some additional tips to help conceptualize this transform:
5215   // - Try to see the operation as placing a single zero in a value of all ones.
5216   // - There exists no value for x which would allow the result to contain zero.
5217   // - Values of x larger than the bitwidth are undefined and do not require a
5218   //   consistent result.
5219   // - Pushing the zero left requires shifting one bits in from the right.
5220   // A rotate left of ~1 is a nice way of achieving the desired result.
5221   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5222       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5223     SDLoc DL(N);
5224     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5225                        N0.getOperand(1));
5226   }
5227 
5228   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5229   if (N0.getOpcode() == N1.getOpcode())
5230     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5231       return Tmp;
5232 
5233   // Simplify the expression using non-local knowledge.
5234   if (SimplifyDemandedBits(SDValue(N, 0)))
5235     return SDValue(N, 0);
5236 
5237   return SDValue();
5238 }
5239 
5240 /// Handle transforms common to the three shifts, when the shift amount is a
5241 /// constant.
5242 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5243   SDNode *LHS = N->getOperand(0).getNode();
5244   if (!LHS->hasOneUse()) return SDValue();
5245 
5246   // We want to pull some binops through shifts, so that we have (and (shift))
5247   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5248   // thing happens with address calculations, so it's important to canonicalize
5249   // it.
5250   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5251 
5252   switch (LHS->getOpcode()) {
5253   default: return SDValue();
5254   case ISD::OR:
5255   case ISD::XOR:
5256     HighBitSet = false; // We can only transform sra if the high bit is clear.
5257     break;
5258   case ISD::AND:
5259     HighBitSet = true;  // We can only transform sra if the high bit is set.
5260     break;
5261   case ISD::ADD:
5262     if (N->getOpcode() != ISD::SHL)
5263       return SDValue(); // only shl(add) not sr[al](add).
5264     HighBitSet = false; // We can only transform sra if the high bit is clear.
5265     break;
5266   }
5267 
5268   // We require the RHS of the binop to be a constant and not opaque as well.
5269   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5270   if (!BinOpCst) return SDValue();
5271 
5272   // FIXME: disable this unless the input to the binop is a shift by a constant
5273   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5274   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5275   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5276                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5277                  BinOpLHSVal->getOpcode() == ISD::SRL;
5278   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5279                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5280 
5281   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5282       !isCopyOrSelect)
5283     return SDValue();
5284 
5285   if (isCopyOrSelect && N->hasOneUse())
5286     return SDValue();
5287 
5288   EVT VT = N->getValueType(0);
5289 
5290   // If this is a signed shift right, and the high bit is modified by the
5291   // logical operation, do not perform the transformation. The highBitSet
5292   // boolean indicates the value of the high bit of the constant which would
5293   // cause it to be modified for this operation.
5294   if (N->getOpcode() == ISD::SRA) {
5295     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5296     if (BinOpRHSSignSet != HighBitSet)
5297       return SDValue();
5298   }
5299 
5300   if (!TLI.isDesirableToCommuteWithShift(LHS))
5301     return SDValue();
5302 
5303   // Fold the constants, shifting the binop RHS by the shift amount.
5304   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5305                                N->getValueType(0),
5306                                LHS->getOperand(1), N->getOperand(1));
5307   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5308 
5309   // Create the new shift.
5310   SDValue NewShift = DAG.getNode(N->getOpcode(),
5311                                  SDLoc(LHS->getOperand(0)),
5312                                  VT, LHS->getOperand(0), N->getOperand(1));
5313 
5314   // Create the new binop.
5315   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5316 }
5317 
5318 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5319   assert(N->getOpcode() == ISD::TRUNCATE);
5320   assert(N->getOperand(0).getOpcode() == ISD::AND);
5321 
5322   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5323   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5324     SDValue N01 = N->getOperand(0).getOperand(1);
5325     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5326       SDLoc DL(N);
5327       EVT TruncVT = N->getValueType(0);
5328       SDValue N00 = N->getOperand(0).getOperand(0);
5329       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5330       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5331       AddToWorklist(Trunc00.getNode());
5332       AddToWorklist(Trunc01.getNode());
5333       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5334     }
5335   }
5336 
5337   return SDValue();
5338 }
5339 
5340 SDValue DAGCombiner::visitRotate(SDNode *N) {
5341   SDLoc dl(N);
5342   SDValue N0 = N->getOperand(0);
5343   SDValue N1 = N->getOperand(1);
5344   EVT VT = N->getValueType(0);
5345   unsigned Bitsize = VT.getScalarSizeInBits();
5346 
5347   // fold (rot x, 0) -> x
5348   if (isNullConstantOrNullSplatConstant(N1))
5349     return N0;
5350 
5351   // fold (rot x, c) -> (rot x, c % BitSize)
5352   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5353     if (Cst->getAPIntValue().uge(Bitsize)) {
5354       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5355       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5356                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5357     }
5358   }
5359 
5360   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5361   if (N1.getOpcode() == ISD::TRUNCATE &&
5362       N1.getOperand(0).getOpcode() == ISD::AND) {
5363     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5364       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5365   }
5366 
5367   unsigned NextOp = N0.getOpcode();
5368   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5369   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5370     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5371     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5372     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5373       EVT ShiftVT = C1->getValueType(0);
5374       bool SameSide = (N->getOpcode() == NextOp);
5375       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5376       if (SDValue CombinedShift =
5377               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5378         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5379         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5380             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5381             BitsizeC.getNode());
5382         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5383                            CombinedShiftNorm);
5384       }
5385     }
5386   }
5387   return SDValue();
5388 }
5389 
5390 SDValue DAGCombiner::visitSHL(SDNode *N) {
5391   SDValue N0 = N->getOperand(0);
5392   SDValue N1 = N->getOperand(1);
5393   EVT VT = N0.getValueType();
5394   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5395 
5396   // fold vector ops
5397   if (VT.isVector()) {
5398     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5399       return FoldedVOp;
5400 
5401     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5402     // If setcc produces all-one true value then:
5403     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5404     if (N1CV && N1CV->isConstant()) {
5405       if (N0.getOpcode() == ISD::AND) {
5406         SDValue N00 = N0->getOperand(0);
5407         SDValue N01 = N0->getOperand(1);
5408         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5409 
5410         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5411             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5412                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5413           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5414                                                      N01CV, N1CV))
5415             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5416         }
5417       }
5418     }
5419   }
5420 
5421   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5422 
5423   // fold (shl c1, c2) -> c1<<c2
5424   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5425   if (N0C && N1C && !N1C->isOpaque())
5426     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5427   // fold (shl 0, x) -> 0
5428   if (isNullConstantOrNullSplatConstant(N0))
5429     return N0;
5430   // fold (shl x, c >= size(x)) -> undef
5431   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5432   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5433     return Val->getAPIntValue().uge(OpSizeInBits);
5434   };
5435   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5436     return DAG.getUNDEF(VT);
5437   // fold (shl x, 0) -> x
5438   if (N1C && N1C->isNullValue())
5439     return N0;
5440   // fold (shl undef, x) -> 0
5441   if (N0.isUndef())
5442     return DAG.getConstant(0, SDLoc(N), VT);
5443 
5444   if (SDValue NewSel = foldBinOpIntoSelect(N))
5445     return NewSel;
5446 
5447   // if (shl x, c) is known to be zero, return 0
5448   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5449                             APInt::getAllOnesValue(OpSizeInBits)))
5450     return DAG.getConstant(0, SDLoc(N), VT);
5451   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5452   if (N1.getOpcode() == ISD::TRUNCATE &&
5453       N1.getOperand(0).getOpcode() == ISD::AND) {
5454     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5455       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5456   }
5457 
5458   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5459     return SDValue(N, 0);
5460 
5461   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5462   if (N0.getOpcode() == ISD::SHL) {
5463     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5464                                           ConstantSDNode *RHS) {
5465       APInt c1 = LHS->getAPIntValue();
5466       APInt c2 = RHS->getAPIntValue();
5467       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5468       return (c1 + c2).uge(OpSizeInBits);
5469     };
5470     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5471       return DAG.getConstant(0, SDLoc(N), VT);
5472 
5473     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5474                                        ConstantSDNode *RHS) {
5475       APInt c1 = LHS->getAPIntValue();
5476       APInt c2 = RHS->getAPIntValue();
5477       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5478       return (c1 + c2).ult(OpSizeInBits);
5479     };
5480     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5481       SDLoc DL(N);
5482       EVT ShiftVT = N1.getValueType();
5483       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5484       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5485     }
5486   }
5487 
5488   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5489   // For this to be valid, the second form must not preserve any of the bits
5490   // that are shifted out by the inner shift in the first form.  This means
5491   // the outer shift size must be >= the number of bits added by the ext.
5492   // As a corollary, we don't care what kind of ext it is.
5493   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5494               N0.getOpcode() == ISD::ANY_EXTEND ||
5495               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5496       N0.getOperand(0).getOpcode() == ISD::SHL) {
5497     SDValue N0Op0 = N0.getOperand(0);
5498     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5499       APInt c1 = N0Op0C1->getAPIntValue();
5500       APInt c2 = N1C->getAPIntValue();
5501       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5502 
5503       EVT InnerShiftVT = N0Op0.getValueType();
5504       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5505       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5506         SDLoc DL(N0);
5507         APInt Sum = c1 + c2;
5508         if (Sum.uge(OpSizeInBits))
5509           return DAG.getConstant(0, DL, VT);
5510 
5511         return DAG.getNode(
5512             ISD::SHL, DL, VT,
5513             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5514             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5515       }
5516     }
5517   }
5518 
5519   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5520   // Only fold this if the inner zext has no other uses to avoid increasing
5521   // the total number of instructions.
5522   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5523       N0.getOperand(0).getOpcode() == ISD::SRL) {
5524     SDValue N0Op0 = N0.getOperand(0);
5525     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5526       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5527         uint64_t c1 = N0Op0C1->getZExtValue();
5528         uint64_t c2 = N1C->getZExtValue();
5529         if (c1 == c2) {
5530           SDValue NewOp0 = N0.getOperand(0);
5531           EVT CountVT = NewOp0.getOperand(1).getValueType();
5532           SDLoc DL(N);
5533           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5534                                        NewOp0,
5535                                        DAG.getConstant(c2, DL, CountVT));
5536           AddToWorklist(NewSHL.getNode());
5537           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5538         }
5539       }
5540     }
5541   }
5542 
5543   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5544   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5545   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5546       N0->getFlags().hasExact()) {
5547     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5548       uint64_t C1 = N0C1->getZExtValue();
5549       uint64_t C2 = N1C->getZExtValue();
5550       SDLoc DL(N);
5551       if (C1 <= C2)
5552         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5553                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5554       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5555                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5556     }
5557   }
5558 
5559   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5560   //                               (and (srl x, (sub c1, c2), MASK)
5561   // Only fold this if the inner shift has no other uses -- if it does, folding
5562   // this will increase the total number of instructions.
5563   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5564     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5565       uint64_t c1 = N0C1->getZExtValue();
5566       if (c1 < OpSizeInBits) {
5567         uint64_t c2 = N1C->getZExtValue();
5568         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5569         SDValue Shift;
5570         if (c2 > c1) {
5571           Mask <<= c2 - c1;
5572           SDLoc DL(N);
5573           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5574                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5575         } else {
5576           Mask.lshrInPlace(c1 - c2);
5577           SDLoc DL(N);
5578           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5579                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5580         }
5581         SDLoc DL(N0);
5582         return DAG.getNode(ISD::AND, DL, VT, Shift,
5583                            DAG.getConstant(Mask, DL, VT));
5584       }
5585     }
5586   }
5587 
5588   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5589   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5590       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5591     SDLoc DL(N);
5592     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5593     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5594     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5595   }
5596 
5597   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5598   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5599   // Variant of version done on multiply, except mul by a power of 2 is turned
5600   // into a shift.
5601   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
5602       N0.getNode()->hasOneUse() &&
5603       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5604       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5605     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5606     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5607     AddToWorklist(Shl0.getNode());
5608     AddToWorklist(Shl1.getNode());
5609     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
5610   }
5611 
5612   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5613   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5614       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5615       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5616     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5617     if (isConstantOrConstantVector(Shl))
5618       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5619   }
5620 
5621   if (N1C && !N1C->isOpaque())
5622     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5623       return NewSHL;
5624 
5625   return SDValue();
5626 }
5627 
5628 SDValue DAGCombiner::visitSRA(SDNode *N) {
5629   SDValue N0 = N->getOperand(0);
5630   SDValue N1 = N->getOperand(1);
5631   EVT VT = N0.getValueType();
5632   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5633 
5634   // Arithmetic shifting an all-sign-bit value is a no-op.
5635   // fold (sra 0, x) -> 0
5636   // fold (sra -1, x) -> -1
5637   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5638     return N0;
5639 
5640   // fold vector ops
5641   if (VT.isVector())
5642     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5643       return FoldedVOp;
5644 
5645   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5646 
5647   // fold (sra c1, c2) -> (sra c1, c2)
5648   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5649   if (N0C && N1C && !N1C->isOpaque())
5650     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5651   // fold (sra x, c >= size(x)) -> undef
5652   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5653   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5654     return Val->getAPIntValue().uge(OpSizeInBits);
5655   };
5656   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5657     return DAG.getUNDEF(VT);
5658   // fold (sra x, 0) -> x
5659   if (N1C && N1C->isNullValue())
5660     return N0;
5661 
5662   if (SDValue NewSel = foldBinOpIntoSelect(N))
5663     return NewSel;
5664 
5665   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5666   // sext_inreg.
5667   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5668     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5669     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5670     if (VT.isVector())
5671       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5672                                ExtVT, VT.getVectorNumElements());
5673     if ((!LegalOperations ||
5674          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5675       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5676                          N0.getOperand(0), DAG.getValueType(ExtVT));
5677   }
5678 
5679   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5680   if (N0.getOpcode() == ISD::SRA) {
5681     SDLoc DL(N);
5682     EVT ShiftVT = N1.getValueType();
5683 
5684     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5685                                           ConstantSDNode *RHS) {
5686       APInt c1 = LHS->getAPIntValue();
5687       APInt c2 = RHS->getAPIntValue();
5688       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5689       return (c1 + c2).uge(OpSizeInBits);
5690     };
5691     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5692       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5693                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5694 
5695     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5696                                        ConstantSDNode *RHS) {
5697       APInt c1 = LHS->getAPIntValue();
5698       APInt c2 = RHS->getAPIntValue();
5699       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5700       return (c1 + c2).ult(OpSizeInBits);
5701     };
5702     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5703       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5704       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5705     }
5706   }
5707 
5708   // fold (sra (shl X, m), (sub result_size, n))
5709   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5710   // result_size - n != m.
5711   // If truncate is free for the target sext(shl) is likely to result in better
5712   // code.
5713   if (N0.getOpcode() == ISD::SHL && N1C) {
5714     // Get the two constanst of the shifts, CN0 = m, CN = n.
5715     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5716     if (N01C) {
5717       LLVMContext &Ctx = *DAG.getContext();
5718       // Determine what the truncate's result bitsize and type would be.
5719       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5720 
5721       if (VT.isVector())
5722         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5723 
5724       // Determine the residual right-shift amount.
5725       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5726 
5727       // If the shift is not a no-op (in which case this should be just a sign
5728       // extend already), the truncated to type is legal, sign_extend is legal
5729       // on that type, and the truncate to that type is both legal and free,
5730       // perform the transform.
5731       if ((ShiftAmt > 0) &&
5732           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5733           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5734           TLI.isTruncateFree(VT, TruncVT)) {
5735 
5736         SDLoc DL(N);
5737         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5738             getShiftAmountTy(N0.getOperand(0).getValueType()));
5739         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5740                                     N0.getOperand(0), Amt);
5741         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5742                                     Shift);
5743         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5744                            N->getValueType(0), Trunc);
5745       }
5746     }
5747   }
5748 
5749   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5750   if (N1.getOpcode() == ISD::TRUNCATE &&
5751       N1.getOperand(0).getOpcode() == ISD::AND) {
5752     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5753       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5754   }
5755 
5756   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5757   //      if c1 is equal to the number of bits the trunc removes
5758   if (N0.getOpcode() == ISD::TRUNCATE &&
5759       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5760        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5761       N0.getOperand(0).hasOneUse() &&
5762       N0.getOperand(0).getOperand(1).hasOneUse() &&
5763       N1C) {
5764     SDValue N0Op0 = N0.getOperand(0);
5765     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5766       unsigned LargeShiftVal = LargeShift->getZExtValue();
5767       EVT LargeVT = N0Op0.getValueType();
5768 
5769       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5770         SDLoc DL(N);
5771         SDValue Amt =
5772           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5773                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5774         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5775                                   N0Op0.getOperand(0), Amt);
5776         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5777       }
5778     }
5779   }
5780 
5781   // Simplify, based on bits shifted out of the LHS.
5782   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5783     return SDValue(N, 0);
5784 
5785 
5786   // If the sign bit is known to be zero, switch this to a SRL.
5787   if (DAG.SignBitIsZero(N0))
5788     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5789 
5790   if (N1C && !N1C->isOpaque())
5791     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5792       return NewSRA;
5793 
5794   return SDValue();
5795 }
5796 
5797 SDValue DAGCombiner::visitSRL(SDNode *N) {
5798   SDValue N0 = N->getOperand(0);
5799   SDValue N1 = N->getOperand(1);
5800   EVT VT = N0.getValueType();
5801   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5802 
5803   // fold vector ops
5804   if (VT.isVector())
5805     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5806       return FoldedVOp;
5807 
5808   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5809 
5810   // fold (srl c1, c2) -> c1 >>u c2
5811   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5812   if (N0C && N1C && !N1C->isOpaque())
5813     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5814   // fold (srl 0, x) -> 0
5815   if (isNullConstantOrNullSplatConstant(N0))
5816     return N0;
5817   // fold (srl x, c >= size(x)) -> undef
5818   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5819   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5820     return Val->getAPIntValue().uge(OpSizeInBits);
5821   };
5822   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5823     return DAG.getUNDEF(VT);
5824   // fold (srl x, 0) -> x
5825   if (N1C && N1C->isNullValue())
5826     return N0;
5827 
5828   if (SDValue NewSel = foldBinOpIntoSelect(N))
5829     return NewSel;
5830 
5831   // if (srl x, c) is known to be zero, return 0
5832   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5833                                    APInt::getAllOnesValue(OpSizeInBits)))
5834     return DAG.getConstant(0, SDLoc(N), VT);
5835 
5836   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5837   if (N0.getOpcode() == ISD::SRL) {
5838     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5839                                           ConstantSDNode *RHS) {
5840       APInt c1 = LHS->getAPIntValue();
5841       APInt c2 = RHS->getAPIntValue();
5842       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5843       return (c1 + c2).uge(OpSizeInBits);
5844     };
5845     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5846       return DAG.getConstant(0, SDLoc(N), VT);
5847 
5848     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5849                                        ConstantSDNode *RHS) {
5850       APInt c1 = LHS->getAPIntValue();
5851       APInt c2 = RHS->getAPIntValue();
5852       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5853       return (c1 + c2).ult(OpSizeInBits);
5854     };
5855     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5856       SDLoc DL(N);
5857       EVT ShiftVT = N1.getValueType();
5858       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5859       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
5860     }
5861   }
5862 
5863   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5864   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5865       N0.getOperand(0).getOpcode() == ISD::SRL) {
5866     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5867       uint64_t c1 = N001C->getZExtValue();
5868       uint64_t c2 = N1C->getZExtValue();
5869       EVT InnerShiftVT = N0.getOperand(0).getValueType();
5870       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5871       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5872       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5873       if (c1 + OpSizeInBits == InnerShiftSize) {
5874         SDLoc DL(N0);
5875         if (c1 + c2 >= InnerShiftSize)
5876           return DAG.getConstant(0, DL, VT);
5877         return DAG.getNode(ISD::TRUNCATE, DL, VT,
5878                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5879                                        N0.getOperand(0).getOperand(0),
5880                                        DAG.getConstant(c1 + c2, DL,
5881                                                        ShiftCountVT)));
5882       }
5883     }
5884   }
5885 
5886   // fold (srl (shl x, c), c) -> (and x, cst2)
5887   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5888       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5889     SDLoc DL(N);
5890     SDValue Mask =
5891         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5892     AddToWorklist(Mask.getNode());
5893     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5894   }
5895 
5896   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5897   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5898     // Shifting in all undef bits?
5899     EVT SmallVT = N0.getOperand(0).getValueType();
5900     unsigned BitSize = SmallVT.getScalarSizeInBits();
5901     if (N1C->getZExtValue() >= BitSize)
5902       return DAG.getUNDEF(VT);
5903 
5904     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5905       uint64_t ShiftAmt = N1C->getZExtValue();
5906       SDLoc DL0(N0);
5907       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5908                                        N0.getOperand(0),
5909                           DAG.getConstant(ShiftAmt, DL0,
5910                                           getShiftAmountTy(SmallVT)));
5911       AddToWorklist(SmallShift.getNode());
5912       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5913       SDLoc DL(N);
5914       return DAG.getNode(ISD::AND, DL, VT,
5915                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5916                          DAG.getConstant(Mask, DL, VT));
5917     }
5918   }
5919 
5920   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5921   // bit, which is unmodified by sra.
5922   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5923     if (N0.getOpcode() == ISD::SRA)
5924       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5925   }
5926 
5927   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5928   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5929       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5930     KnownBits Known;
5931     DAG.computeKnownBits(N0.getOperand(0), Known);
5932 
5933     // If any of the input bits are KnownOne, then the input couldn't be all
5934     // zeros, thus the result of the srl will always be zero.
5935     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5936 
5937     // If all of the bits input the to ctlz node are known to be zero, then
5938     // the result of the ctlz is "32" and the result of the shift is one.
5939     APInt UnknownBits = ~Known.Zero;
5940     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5941 
5942     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5943     if (UnknownBits.isPowerOf2()) {
5944       // Okay, we know that only that the single bit specified by UnknownBits
5945       // could be set on input to the CTLZ node. If this bit is set, the SRL
5946       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5947       // to an SRL/XOR pair, which is likely to simplify more.
5948       unsigned ShAmt = UnknownBits.countTrailingZeros();
5949       SDValue Op = N0.getOperand(0);
5950 
5951       if (ShAmt) {
5952         SDLoc DL(N0);
5953         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5954                   DAG.getConstant(ShAmt, DL,
5955                                   getShiftAmountTy(Op.getValueType())));
5956         AddToWorklist(Op.getNode());
5957       }
5958 
5959       SDLoc DL(N);
5960       return DAG.getNode(ISD::XOR, DL, VT,
5961                          Op, DAG.getConstant(1, DL, VT));
5962     }
5963   }
5964 
5965   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5966   if (N1.getOpcode() == ISD::TRUNCATE &&
5967       N1.getOperand(0).getOpcode() == ISD::AND) {
5968     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5969       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5970   }
5971 
5972   // fold operands of srl based on knowledge that the low bits are not
5973   // demanded.
5974   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5975     return SDValue(N, 0);
5976 
5977   if (N1C && !N1C->isOpaque())
5978     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5979       return NewSRL;
5980 
5981   // Attempt to convert a srl of a load into a narrower zero-extending load.
5982   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5983     return NarrowLoad;
5984 
5985   // Here is a common situation. We want to optimize:
5986   //
5987   //   %a = ...
5988   //   %b = and i32 %a, 2
5989   //   %c = srl i32 %b, 1
5990   //   brcond i32 %c ...
5991   //
5992   // into
5993   //
5994   //   %a = ...
5995   //   %b = and %a, 2
5996   //   %c = setcc eq %b, 0
5997   //   brcond %c ...
5998   //
5999   // However when after the source operand of SRL is optimized into AND, the SRL
6000   // itself may not be optimized further. Look for it and add the BRCOND into
6001   // the worklist.
6002   if (N->hasOneUse()) {
6003     SDNode *Use = *N->use_begin();
6004     if (Use->getOpcode() == ISD::BRCOND)
6005       AddToWorklist(Use);
6006     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6007       // Also look pass the truncate.
6008       Use = *Use->use_begin();
6009       if (Use->getOpcode() == ISD::BRCOND)
6010         AddToWorklist(Use);
6011     }
6012   }
6013 
6014   return SDValue();
6015 }
6016 
6017 SDValue DAGCombiner::visitABS(SDNode *N) {
6018   SDValue N0 = N->getOperand(0);
6019   EVT VT = N->getValueType(0);
6020 
6021   // fold (abs c1) -> c2
6022   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6023     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6024   // fold (abs (abs x)) -> (abs x)
6025   if (N0.getOpcode() == ISD::ABS)
6026     return N0;
6027   // fold (abs x) -> x iff not-negative
6028   if (DAG.SignBitIsZero(N0))
6029     return N0;
6030   return SDValue();
6031 }
6032 
6033 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6034   SDValue N0 = N->getOperand(0);
6035   EVT VT = N->getValueType(0);
6036 
6037   // fold (bswap c1) -> c2
6038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6039     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6040   // fold (bswap (bswap x)) -> x
6041   if (N0.getOpcode() == ISD::BSWAP)
6042     return N0->getOperand(0);
6043   return SDValue();
6044 }
6045 
6046 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6047   SDValue N0 = N->getOperand(0);
6048   EVT VT = N->getValueType(0);
6049 
6050   // fold (bitreverse c1) -> c2
6051   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6052     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6053   // fold (bitreverse (bitreverse x)) -> x
6054   if (N0.getOpcode() == ISD::BITREVERSE)
6055     return N0.getOperand(0);
6056   return SDValue();
6057 }
6058 
6059 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6060   SDValue N0 = N->getOperand(0);
6061   EVT VT = N->getValueType(0);
6062 
6063   // fold (ctlz c1) -> c2
6064   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6065     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6066   return SDValue();
6067 }
6068 
6069 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6070   SDValue N0 = N->getOperand(0);
6071   EVT VT = N->getValueType(0);
6072 
6073   // fold (ctlz_zero_undef c1) -> c2
6074   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6075     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6076   return SDValue();
6077 }
6078 
6079 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6080   SDValue N0 = N->getOperand(0);
6081   EVT VT = N->getValueType(0);
6082 
6083   // fold (cttz c1) -> c2
6084   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6085     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6086   return SDValue();
6087 }
6088 
6089 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6090   SDValue N0 = N->getOperand(0);
6091   EVT VT = N->getValueType(0);
6092 
6093   // fold (cttz_zero_undef c1) -> c2
6094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6095     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6096   return SDValue();
6097 }
6098 
6099 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6100   SDValue N0 = N->getOperand(0);
6101   EVT VT = N->getValueType(0);
6102 
6103   // fold (ctpop c1) -> c2
6104   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6105     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6106   return SDValue();
6107 }
6108 
6109 
6110 /// \brief Generate Min/Max node
6111 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6112                                    SDValue RHS, SDValue True, SDValue False,
6113                                    ISD::CondCode CC, const TargetLowering &TLI,
6114                                    SelectionDAG &DAG) {
6115   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6116     return SDValue();
6117 
6118   switch (CC) {
6119   case ISD::SETOLT:
6120   case ISD::SETOLE:
6121   case ISD::SETLT:
6122   case ISD::SETLE:
6123   case ISD::SETULT:
6124   case ISD::SETULE: {
6125     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6126     if (TLI.isOperationLegal(Opcode, VT))
6127       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6128     return SDValue();
6129   }
6130   case ISD::SETOGT:
6131   case ISD::SETOGE:
6132   case ISD::SETGT:
6133   case ISD::SETGE:
6134   case ISD::SETUGT:
6135   case ISD::SETUGE: {
6136     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6137     if (TLI.isOperationLegal(Opcode, VT))
6138       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6139     return SDValue();
6140   }
6141   default:
6142     return SDValue();
6143   }
6144 }
6145 
6146 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6147   SDValue Cond = N->getOperand(0);
6148   SDValue N1 = N->getOperand(1);
6149   SDValue N2 = N->getOperand(2);
6150   EVT VT = N->getValueType(0);
6151   EVT CondVT = Cond.getValueType();
6152   SDLoc DL(N);
6153 
6154   if (!VT.isInteger())
6155     return SDValue();
6156 
6157   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6158   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6159   if (!C1 || !C2)
6160     return SDValue();
6161 
6162   // Only do this before legalization to avoid conflicting with target-specific
6163   // transforms in the other direction (create a select from a zext/sext). There
6164   // is also a target-independent combine here in DAGCombiner in the other
6165   // direction for (select Cond, -1, 0) when the condition is not i1.
6166   if (CondVT == MVT::i1 && !LegalOperations) {
6167     if (C1->isNullValue() && C2->isOne()) {
6168       // select Cond, 0, 1 --> zext (!Cond)
6169       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6170       if (VT != MVT::i1)
6171         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6172       return NotCond;
6173     }
6174     if (C1->isNullValue() && C2->isAllOnesValue()) {
6175       // select Cond, 0, -1 --> sext (!Cond)
6176       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6177       if (VT != MVT::i1)
6178         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6179       return NotCond;
6180     }
6181     if (C1->isOne() && C2->isNullValue()) {
6182       // select Cond, 1, 0 --> zext (Cond)
6183       if (VT != MVT::i1)
6184         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6185       return Cond;
6186     }
6187     if (C1->isAllOnesValue() && C2->isNullValue()) {
6188       // select Cond, -1, 0 --> sext (Cond)
6189       if (VT != MVT::i1)
6190         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6191       return Cond;
6192     }
6193 
6194     // For any constants that differ by 1, we can transform the select into an
6195     // extend and add. Use a target hook because some targets may prefer to
6196     // transform in the other direction.
6197     if (TLI.convertSelectOfConstantsToMath(VT)) {
6198       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6199         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6200         if (VT != MVT::i1)
6201           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6202         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6203       }
6204       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6205         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6206         if (VT != MVT::i1)
6207           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6208         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6209       }
6210     }
6211 
6212     return SDValue();
6213   }
6214 
6215   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6216   // We can't do this reliably if integer based booleans have different contents
6217   // to floating point based booleans. This is because we can't tell whether we
6218   // have an integer-based boolean or a floating-point-based boolean unless we
6219   // can find the SETCC that produced it and inspect its operands. This is
6220   // fairly easy if C is the SETCC node, but it can potentially be
6221   // undiscoverable (or not reasonably discoverable). For example, it could be
6222   // in another basic block or it could require searching a complicated
6223   // expression.
6224   if (CondVT.isInteger() &&
6225       TLI.getBooleanContents(false, true) ==
6226           TargetLowering::ZeroOrOneBooleanContent &&
6227       TLI.getBooleanContents(false, false) ==
6228           TargetLowering::ZeroOrOneBooleanContent &&
6229       C1->isNullValue() && C2->isOne()) {
6230     SDValue NotCond =
6231         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6232     if (VT.bitsEq(CondVT))
6233       return NotCond;
6234     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6235   }
6236 
6237   return SDValue();
6238 }
6239 
6240 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6241   SDValue N0 = N->getOperand(0);
6242   SDValue N1 = N->getOperand(1);
6243   SDValue N2 = N->getOperand(2);
6244   EVT VT = N->getValueType(0);
6245   EVT VT0 = N0.getValueType();
6246   SDLoc DL(N);
6247 
6248   // fold (select C, X, X) -> X
6249   if (N1 == N2)
6250     return N1;
6251 
6252   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6253     // fold (select true, X, Y) -> X
6254     // fold (select false, X, Y) -> Y
6255     return !N0C->isNullValue() ? N1 : N2;
6256   }
6257 
6258   // fold (select X, X, Y) -> (or X, Y)
6259   // fold (select X, 1, Y) -> (or C, Y)
6260   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6261     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6262 
6263   if (SDValue V = foldSelectOfConstants(N))
6264     return V;
6265 
6266   // fold (select C, 0, X) -> (and (not C), X)
6267   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6268     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6269     AddToWorklist(NOTNode.getNode());
6270     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6271   }
6272   // fold (select C, X, 1) -> (or (not C), X)
6273   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6274     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6275     AddToWorklist(NOTNode.getNode());
6276     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6277   }
6278   // fold (select X, Y, X) -> (and X, Y)
6279   // fold (select X, Y, 0) -> (and X, Y)
6280   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6281     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6282 
6283   // If we can fold this based on the true/false value, do so.
6284   if (SimplifySelectOps(N, N1, N2))
6285     return SDValue(N, 0); // Don't revisit N.
6286 
6287   if (VT0 == MVT::i1) {
6288     // The code in this block deals with the following 2 equivalences:
6289     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6290     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6291     // The target can specify its preferred form with the
6292     // shouldNormalizeToSelectSequence() callback. However we always transform
6293     // to the right anyway if we find the inner select exists in the DAG anyway
6294     // and we always transform to the left side if we know that we can further
6295     // optimize the combination of the conditions.
6296     bool normalizeToSequence =
6297         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6298     // select (and Cond0, Cond1), X, Y
6299     //   -> select Cond0, (select Cond1, X, Y), Y
6300     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6301       SDValue Cond0 = N0->getOperand(0);
6302       SDValue Cond1 = N0->getOperand(1);
6303       SDValue InnerSelect =
6304           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6305       if (normalizeToSequence || !InnerSelect.use_empty())
6306         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6307                            InnerSelect, N2);
6308     }
6309     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6310     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6311       SDValue Cond0 = N0->getOperand(0);
6312       SDValue Cond1 = N0->getOperand(1);
6313       SDValue InnerSelect =
6314           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6315       if (normalizeToSequence || !InnerSelect.use_empty())
6316         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6317                            InnerSelect);
6318     }
6319 
6320     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6321     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6322       SDValue N1_0 = N1->getOperand(0);
6323       SDValue N1_1 = N1->getOperand(1);
6324       SDValue N1_2 = N1->getOperand(2);
6325       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6326         // Create the actual and node if we can generate good code for it.
6327         if (!normalizeToSequence) {
6328           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6329           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6330         }
6331         // Otherwise see if we can optimize the "and" to a better pattern.
6332         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6333           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6334                              N2);
6335       }
6336     }
6337     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6338     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6339       SDValue N2_0 = N2->getOperand(0);
6340       SDValue N2_1 = N2->getOperand(1);
6341       SDValue N2_2 = N2->getOperand(2);
6342       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6343         // Create the actual or node if we can generate good code for it.
6344         if (!normalizeToSequence) {
6345           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6346           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6347         }
6348         // Otherwise see if we can optimize to a better pattern.
6349         if (SDValue Combined = visitORLike(N0, N2_0, N))
6350           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6351                              N2_2);
6352       }
6353     }
6354   }
6355 
6356   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6357   if (VT0 == MVT::i1) {
6358     if (N0->getOpcode() == ISD::XOR) {
6359       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6360         SDValue Cond0 = N0->getOperand(0);
6361         if (C->isOne())
6362           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6363       }
6364     }
6365   }
6366 
6367   // fold selects based on a setcc into other things, such as min/max/abs
6368   if (N0.getOpcode() == ISD::SETCC) {
6369     // select x, y (fcmp lt x, y) -> fminnum x, y
6370     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6371     //
6372     // This is OK if we don't care about what happens if either operand is a
6373     // NaN.
6374     //
6375 
6376     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6377     // no signed zeros as well as no nans.
6378     const TargetOptions &Options = DAG.getTarget().Options;
6379     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6380         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6381       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6382 
6383       if (SDValue FMinMax = combineMinNumMaxNum(
6384               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6385         return FMinMax;
6386     }
6387 
6388     if ((!LegalOperations &&
6389          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6390         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6391       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6392                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6393     return SimplifySelect(DL, N0, N1, N2);
6394   }
6395 
6396   return SDValue();
6397 }
6398 
6399 static
6400 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6401   SDLoc DL(N);
6402   EVT LoVT, HiVT;
6403   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6404 
6405   // Split the inputs.
6406   SDValue Lo, Hi, LL, LH, RL, RH;
6407   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6408   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6409 
6410   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6411   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6412 
6413   return std::make_pair(Lo, Hi);
6414 }
6415 
6416 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6417 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6418 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6419   SDLoc DL(N);
6420   SDValue Cond = N->getOperand(0);
6421   SDValue LHS = N->getOperand(1);
6422   SDValue RHS = N->getOperand(2);
6423   EVT VT = N->getValueType(0);
6424   int NumElems = VT.getVectorNumElements();
6425   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6426          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6427          Cond.getOpcode() == ISD::BUILD_VECTOR);
6428 
6429   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6430   // binary ones here.
6431   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6432     return SDValue();
6433 
6434   // We're sure we have an even number of elements due to the
6435   // concat_vectors we have as arguments to vselect.
6436   // Skip BV elements until we find one that's not an UNDEF
6437   // After we find an UNDEF element, keep looping until we get to half the
6438   // length of the BV and see if all the non-undef nodes are the same.
6439   ConstantSDNode *BottomHalf = nullptr;
6440   for (int i = 0; i < NumElems / 2; ++i) {
6441     if (Cond->getOperand(i)->isUndef())
6442       continue;
6443 
6444     if (BottomHalf == nullptr)
6445       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6446     else if (Cond->getOperand(i).getNode() != BottomHalf)
6447       return SDValue();
6448   }
6449 
6450   // Do the same for the second half of the BuildVector
6451   ConstantSDNode *TopHalf = nullptr;
6452   for (int i = NumElems / 2; i < NumElems; ++i) {
6453     if (Cond->getOperand(i)->isUndef())
6454       continue;
6455 
6456     if (TopHalf == nullptr)
6457       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6458     else if (Cond->getOperand(i).getNode() != TopHalf)
6459       return SDValue();
6460   }
6461 
6462   assert(TopHalf && BottomHalf &&
6463          "One half of the selector was all UNDEFs and the other was all the "
6464          "same value. This should have been addressed before this function.");
6465   return DAG.getNode(
6466       ISD::CONCAT_VECTORS, DL, VT,
6467       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6468       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6469 }
6470 
6471 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6472 
6473   if (Level >= AfterLegalizeTypes)
6474     return SDValue();
6475 
6476   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6477   SDValue Mask = MSC->getMask();
6478   SDValue Data  = MSC->getValue();
6479   SDLoc DL(N);
6480 
6481   // If the MSCATTER data type requires splitting and the mask is provided by a
6482   // SETCC, then split both nodes and its operands before legalization. This
6483   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6484   // and enables future optimizations (e.g. min/max pattern matching on X86).
6485   if (Mask.getOpcode() != ISD::SETCC)
6486     return SDValue();
6487 
6488   // Check if any splitting is required.
6489   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6490       TargetLowering::TypeSplitVector)
6491     return SDValue();
6492   SDValue MaskLo, MaskHi, Lo, Hi;
6493   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6494 
6495   EVT LoVT, HiVT;
6496   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6497 
6498   SDValue Chain = MSC->getChain();
6499 
6500   EVT MemoryVT = MSC->getMemoryVT();
6501   unsigned Alignment = MSC->getOriginalAlignment();
6502 
6503   EVT LoMemVT, HiMemVT;
6504   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6505 
6506   SDValue DataLo, DataHi;
6507   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6508 
6509   SDValue BasePtr = MSC->getBasePtr();
6510   SDValue IndexLo, IndexHi;
6511   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6512 
6513   MachineMemOperand *MMO = DAG.getMachineFunction().
6514     getMachineMemOperand(MSC->getPointerInfo(),
6515                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6516                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6517 
6518   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6519   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6520                             DL, OpsLo, MMO);
6521 
6522   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6523   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6524                             DL, OpsHi, MMO);
6525 
6526   AddToWorklist(Lo.getNode());
6527   AddToWorklist(Hi.getNode());
6528 
6529   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6530 }
6531 
6532 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6533 
6534   if (Level >= AfterLegalizeTypes)
6535     return SDValue();
6536 
6537   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6538   SDValue Mask = MST->getMask();
6539   SDValue Data  = MST->getValue();
6540   EVT VT = Data.getValueType();
6541   SDLoc DL(N);
6542 
6543   // If the MSTORE data type requires splitting and the mask is provided by a
6544   // SETCC, then split both nodes and its operands before legalization. This
6545   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6546   // and enables future optimizations (e.g. min/max pattern matching on X86).
6547   if (Mask.getOpcode() == ISD::SETCC) {
6548 
6549     // Check if any splitting is required.
6550     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6551         TargetLowering::TypeSplitVector)
6552       return SDValue();
6553 
6554     SDValue MaskLo, MaskHi, Lo, Hi;
6555     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6556 
6557     SDValue Chain = MST->getChain();
6558     SDValue Ptr   = MST->getBasePtr();
6559 
6560     EVT MemoryVT = MST->getMemoryVT();
6561     unsigned Alignment = MST->getOriginalAlignment();
6562 
6563     // if Alignment is equal to the vector size,
6564     // take the half of it for the second part
6565     unsigned SecondHalfAlignment =
6566       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6567 
6568     EVT LoMemVT, HiMemVT;
6569     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6570 
6571     SDValue DataLo, DataHi;
6572     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6573 
6574     MachineMemOperand *MMO = DAG.getMachineFunction().
6575       getMachineMemOperand(MST->getPointerInfo(),
6576                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6577                            Alignment, MST->getAAInfo(), MST->getRanges());
6578 
6579     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6580                             MST->isTruncatingStore(),
6581                             MST->isCompressingStore());
6582 
6583     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6584                                      MST->isCompressingStore());
6585 
6586     MMO = DAG.getMachineFunction().
6587       getMachineMemOperand(MST->getPointerInfo(),
6588                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6589                            SecondHalfAlignment, MST->getAAInfo(),
6590                            MST->getRanges());
6591 
6592     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6593                             MST->isTruncatingStore(),
6594                             MST->isCompressingStore());
6595 
6596     AddToWorklist(Lo.getNode());
6597     AddToWorklist(Hi.getNode());
6598 
6599     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6600   }
6601   return SDValue();
6602 }
6603 
6604 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6605 
6606   if (Level >= AfterLegalizeTypes)
6607     return SDValue();
6608 
6609   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6610   SDValue Mask = MGT->getMask();
6611   SDLoc DL(N);
6612 
6613   // If the MGATHER result requires splitting and the mask is provided by a
6614   // SETCC, then split both nodes and its operands before legalization. This
6615   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6616   // and enables future optimizations (e.g. min/max pattern matching on X86).
6617 
6618   if (Mask.getOpcode() != ISD::SETCC)
6619     return SDValue();
6620 
6621   EVT VT = N->getValueType(0);
6622 
6623   // Check if any splitting is required.
6624   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6625       TargetLowering::TypeSplitVector)
6626     return SDValue();
6627 
6628   SDValue MaskLo, MaskHi, Lo, Hi;
6629   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6630 
6631   SDValue Src0 = MGT->getValue();
6632   SDValue Src0Lo, Src0Hi;
6633   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6634 
6635   EVT LoVT, HiVT;
6636   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6637 
6638   SDValue Chain = MGT->getChain();
6639   EVT MemoryVT = MGT->getMemoryVT();
6640   unsigned Alignment = MGT->getOriginalAlignment();
6641 
6642   EVT LoMemVT, HiMemVT;
6643   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6644 
6645   SDValue BasePtr = MGT->getBasePtr();
6646   SDValue Index = MGT->getIndex();
6647   SDValue IndexLo, IndexHi;
6648   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6649 
6650   MachineMemOperand *MMO = DAG.getMachineFunction().
6651     getMachineMemOperand(MGT->getPointerInfo(),
6652                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6653                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6654 
6655   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6656   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6657                             MMO);
6658 
6659   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6660   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6661                             MMO);
6662 
6663   AddToWorklist(Lo.getNode());
6664   AddToWorklist(Hi.getNode());
6665 
6666   // Build a factor node to remember that this load is independent of the
6667   // other one.
6668   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6669                       Hi.getValue(1));
6670 
6671   // Legalized the chain result - switch anything that used the old chain to
6672   // use the new one.
6673   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6674 
6675   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6676 
6677   SDValue RetOps[] = { GatherRes, Chain };
6678   return DAG.getMergeValues(RetOps, DL);
6679 }
6680 
6681 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6682 
6683   if (Level >= AfterLegalizeTypes)
6684     return SDValue();
6685 
6686   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6687   SDValue Mask = MLD->getMask();
6688   SDLoc DL(N);
6689 
6690   // If the MLOAD result requires splitting and the mask is provided by a
6691   // SETCC, then split both nodes and its operands before legalization. This
6692   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6693   // and enables future optimizations (e.g. min/max pattern matching on X86).
6694 
6695   if (Mask.getOpcode() == ISD::SETCC) {
6696     EVT VT = N->getValueType(0);
6697 
6698     // Check if any splitting is required.
6699     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6700         TargetLowering::TypeSplitVector)
6701       return SDValue();
6702 
6703     SDValue MaskLo, MaskHi, Lo, Hi;
6704     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6705 
6706     SDValue Src0 = MLD->getSrc0();
6707     SDValue Src0Lo, Src0Hi;
6708     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6709 
6710     EVT LoVT, HiVT;
6711     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6712 
6713     SDValue Chain = MLD->getChain();
6714     SDValue Ptr   = MLD->getBasePtr();
6715     EVT MemoryVT = MLD->getMemoryVT();
6716     unsigned Alignment = MLD->getOriginalAlignment();
6717 
6718     // if Alignment is equal to the vector size,
6719     // take the half of it for the second part
6720     unsigned SecondHalfAlignment =
6721       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6722          Alignment/2 : Alignment;
6723 
6724     EVT LoMemVT, HiMemVT;
6725     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6726 
6727     MachineMemOperand *MMO = DAG.getMachineFunction().
6728     getMachineMemOperand(MLD->getPointerInfo(),
6729                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6730                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6731 
6732     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6733                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6734 
6735     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6736                                      MLD->isExpandingLoad());
6737 
6738     MMO = DAG.getMachineFunction().
6739     getMachineMemOperand(MLD->getPointerInfo(),
6740                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6741                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6742 
6743     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6744                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6745 
6746     AddToWorklist(Lo.getNode());
6747     AddToWorklist(Hi.getNode());
6748 
6749     // Build a factor node to remember that this load is independent of the
6750     // other one.
6751     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6752                         Hi.getValue(1));
6753 
6754     // Legalized the chain result - switch anything that used the old chain to
6755     // use the new one.
6756     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6757 
6758     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6759 
6760     SDValue RetOps[] = { LoadRes, Chain };
6761     return DAG.getMergeValues(RetOps, DL);
6762   }
6763   return SDValue();
6764 }
6765 
6766 /// A vector select of 2 constant vectors can be simplified to math/logic to
6767 /// avoid a variable select instruction and possibly avoid constant loads.
6768 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
6769   SDValue Cond = N->getOperand(0);
6770   SDValue N1 = N->getOperand(1);
6771   SDValue N2 = N->getOperand(2);
6772   EVT VT = N->getValueType(0);
6773   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
6774       !TLI.convertSelectOfConstantsToMath(VT) ||
6775       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
6776       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
6777     return SDValue();
6778 
6779   // Check if we can use the condition value to increment/decrement a single
6780   // constant value. This simplifies a select to an add and removes a constant
6781   // load/materialization from the general case.
6782   bool AllAddOne = true;
6783   bool AllSubOne = true;
6784   unsigned Elts = VT.getVectorNumElements();
6785   for (unsigned i = 0; i != Elts; ++i) {
6786     SDValue N1Elt = N1.getOperand(i);
6787     SDValue N2Elt = N2.getOperand(i);
6788     if (N1Elt.isUndef() || N2Elt.isUndef())
6789       continue;
6790 
6791     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
6792     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
6793     if (C1 != C2 + 1)
6794       AllAddOne = false;
6795     if (C1 != C2 - 1)
6796       AllSubOne = false;
6797   }
6798 
6799   // Further simplifications for the extra-special cases where the constants are
6800   // all 0 or all -1 should be implemented as folds of these patterns.
6801   SDLoc DL(N);
6802   if (AllAddOne || AllSubOne) {
6803     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
6804     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
6805     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
6806     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
6807     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
6808   }
6809 
6810   // The general case for select-of-constants:
6811   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
6812   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
6813   // leave that to a machine-specific pass.
6814   return SDValue();
6815 }
6816 
6817 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6818   SDValue N0 = N->getOperand(0);
6819   SDValue N1 = N->getOperand(1);
6820   SDValue N2 = N->getOperand(2);
6821   SDLoc DL(N);
6822 
6823   // fold (vselect C, X, X) -> X
6824   if (N1 == N2)
6825     return N1;
6826 
6827   // Canonicalize integer abs.
6828   // vselect (setg[te] X,  0),  X, -X ->
6829   // vselect (setgt    X, -1),  X, -X ->
6830   // vselect (setl[te] X,  0), -X,  X ->
6831   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6832   if (N0.getOpcode() == ISD::SETCC) {
6833     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6834     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6835     bool isAbs = false;
6836     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6837 
6838     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6839          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6840         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6841       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6842     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6843              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6844       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6845 
6846     if (isAbs) {
6847       EVT VT = LHS.getValueType();
6848       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6849         return DAG.getNode(ISD::ABS, DL, VT, LHS);
6850 
6851       SDValue Shift = DAG.getNode(
6852           ISD::SRA, DL, VT, LHS,
6853           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6854       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6855       AddToWorklist(Shift.getNode());
6856       AddToWorklist(Add.getNode());
6857       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6858     }
6859   }
6860 
6861   if (SimplifySelectOps(N, N1, N2))
6862     return SDValue(N, 0);  // Don't revisit N.
6863 
6864   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6865   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6866     return N1;
6867   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6868   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6869     return N2;
6870 
6871   // The ConvertSelectToConcatVector function is assuming both the above
6872   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6873   // and addressed.
6874   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6875       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6876       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6877     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6878       return CV;
6879   }
6880 
6881   if (SDValue V = foldVSelectOfConstants(N))
6882     return V;
6883 
6884   return SDValue();
6885 }
6886 
6887 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6888   SDValue N0 = N->getOperand(0);
6889   SDValue N1 = N->getOperand(1);
6890   SDValue N2 = N->getOperand(2);
6891   SDValue N3 = N->getOperand(3);
6892   SDValue N4 = N->getOperand(4);
6893   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6894 
6895   // fold select_cc lhs, rhs, x, x, cc -> x
6896   if (N2 == N3)
6897     return N2;
6898 
6899   // Determine if the condition we're dealing with is constant
6900   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6901                                   CC, SDLoc(N), false)) {
6902     AddToWorklist(SCC.getNode());
6903 
6904     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6905       if (!SCCC->isNullValue())
6906         return N2;    // cond always true -> true val
6907       else
6908         return N3;    // cond always false -> false val
6909     } else if (SCC->isUndef()) {
6910       // When the condition is UNDEF, just return the first operand. This is
6911       // coherent the DAG creation, no setcc node is created in this case
6912       return N2;
6913     } else if (SCC.getOpcode() == ISD::SETCC) {
6914       // Fold to a simpler select_cc
6915       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6916                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6917                          SCC.getOperand(2));
6918     }
6919   }
6920 
6921   // If we can fold this based on the true/false value, do so.
6922   if (SimplifySelectOps(N, N2, N3))
6923     return SDValue(N, 0);  // Don't revisit N.
6924 
6925   // fold select_cc into other things, such as min/max/abs
6926   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6927 }
6928 
6929 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6930   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6931                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6932                        SDLoc(N));
6933 }
6934 
6935 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6936   SDValue LHS = N->getOperand(0);
6937   SDValue RHS = N->getOperand(1);
6938   SDValue Carry = N->getOperand(2);
6939   SDValue Cond = N->getOperand(3);
6940 
6941   // If Carry is false, fold to a regular SETCC.
6942   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6943     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6944 
6945   return SDValue();
6946 }
6947 
6948 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6949   SDValue LHS = N->getOperand(0);
6950   SDValue RHS = N->getOperand(1);
6951   SDValue Carry = N->getOperand(2);
6952   SDValue Cond = N->getOperand(3);
6953 
6954   // If Carry is false, fold to a regular SETCC.
6955   if (isNullConstant(Carry))
6956     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6957 
6958   return SDValue();
6959 }
6960 
6961 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6962 /// a build_vector of constants.
6963 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6964 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6965 /// Vector extends are not folded if operations are legal; this is to
6966 /// avoid introducing illegal build_vector dag nodes.
6967 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6968                                          SelectionDAG &DAG, bool LegalTypes,
6969                                          bool LegalOperations) {
6970   unsigned Opcode = N->getOpcode();
6971   SDValue N0 = N->getOperand(0);
6972   EVT VT = N->getValueType(0);
6973 
6974   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6975          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6976          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6977          && "Expected EXTEND dag node in input!");
6978 
6979   // fold (sext c1) -> c1
6980   // fold (zext c1) -> c1
6981   // fold (aext c1) -> c1
6982   if (isa<ConstantSDNode>(N0))
6983     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6984 
6985   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6986   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6987   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6988   EVT SVT = VT.getScalarType();
6989   if (!(VT.isVector() &&
6990       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6991       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6992     return nullptr;
6993 
6994   // We can fold this node into a build_vector.
6995   unsigned VTBits = SVT.getSizeInBits();
6996   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6997   SmallVector<SDValue, 8> Elts;
6998   unsigned NumElts = VT.getVectorNumElements();
6999   SDLoc DL(N);
7000 
7001   for (unsigned i=0; i != NumElts; ++i) {
7002     SDValue Op = N0->getOperand(i);
7003     if (Op->isUndef()) {
7004       Elts.push_back(DAG.getUNDEF(SVT));
7005       continue;
7006     }
7007 
7008     SDLoc DL(Op);
7009     // Get the constant value and if needed trunc it to the size of the type.
7010     // Nodes like build_vector might have constants wider than the scalar type.
7011     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7012     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7013       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7014     else
7015       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7016   }
7017 
7018   return DAG.getBuildVector(VT, DL, Elts).getNode();
7019 }
7020 
7021 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7022 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7023 // transformation. Returns true if extension are possible and the above
7024 // mentioned transformation is profitable.
7025 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
7026                                     unsigned ExtOpc,
7027                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7028                                     const TargetLowering &TLI) {
7029   bool HasCopyToRegUses = false;
7030   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
7031   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7032                             UE = N0.getNode()->use_end();
7033        UI != UE; ++UI) {
7034     SDNode *User = *UI;
7035     if (User == N)
7036       continue;
7037     if (UI.getUse().getResNo() != N0.getResNo())
7038       continue;
7039     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7040     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7041       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7042       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7043         // Sign bits will be lost after a zext.
7044         return false;
7045       bool Add = false;
7046       for (unsigned i = 0; i != 2; ++i) {
7047         SDValue UseOp = User->getOperand(i);
7048         if (UseOp == N0)
7049           continue;
7050         if (!isa<ConstantSDNode>(UseOp))
7051           return false;
7052         Add = true;
7053       }
7054       if (Add)
7055         ExtendNodes.push_back(User);
7056       continue;
7057     }
7058     // If truncates aren't free and there are users we can't
7059     // extend, it isn't worthwhile.
7060     if (!isTruncFree)
7061       return false;
7062     // Remember if this value is live-out.
7063     if (User->getOpcode() == ISD::CopyToReg)
7064       HasCopyToRegUses = true;
7065   }
7066 
7067   if (HasCopyToRegUses) {
7068     bool BothLiveOut = false;
7069     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7070          UI != UE; ++UI) {
7071       SDUse &Use = UI.getUse();
7072       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7073         BothLiveOut = true;
7074         break;
7075       }
7076     }
7077     if (BothLiveOut)
7078       // Both unextended and extended values are live out. There had better be
7079       // a good reason for the transformation.
7080       return ExtendNodes.size();
7081   }
7082   return true;
7083 }
7084 
7085 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7086                                   SDValue Trunc, SDValue ExtLoad,
7087                                   const SDLoc &DL, ISD::NodeType ExtType) {
7088   // Extend SetCC uses if necessary.
7089   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7090     SDNode *SetCC = SetCCs[i];
7091     SmallVector<SDValue, 4> Ops;
7092 
7093     for (unsigned j = 0; j != 2; ++j) {
7094       SDValue SOp = SetCC->getOperand(j);
7095       if (SOp == Trunc)
7096         Ops.push_back(ExtLoad);
7097       else
7098         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7099     }
7100 
7101     Ops.push_back(SetCC->getOperand(2));
7102     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7103   }
7104 }
7105 
7106 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7107 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7108   SDValue N0 = N->getOperand(0);
7109   EVT DstVT = N->getValueType(0);
7110   EVT SrcVT = N0.getValueType();
7111 
7112   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7113           N->getOpcode() == ISD::ZERO_EXTEND) &&
7114          "Unexpected node type (not an extend)!");
7115 
7116   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7117   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7118   //   (v8i32 (sext (v8i16 (load x))))
7119   // into:
7120   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7121   //                          (v4i32 (sextload (x + 16)))))
7122   // Where uses of the original load, i.e.:
7123   //   (v8i16 (load x))
7124   // are replaced with:
7125   //   (v8i16 (truncate
7126   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7127   //                            (v4i32 (sextload (x + 16)))))))
7128   //
7129   // This combine is only applicable to illegal, but splittable, vectors.
7130   // All legal types, and illegal non-vector types, are handled elsewhere.
7131   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7132   //
7133   if (N0->getOpcode() != ISD::LOAD)
7134     return SDValue();
7135 
7136   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7137 
7138   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7139       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7140       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7141     return SDValue();
7142 
7143   SmallVector<SDNode *, 4> SetCCs;
7144   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7145     return SDValue();
7146 
7147   ISD::LoadExtType ExtType =
7148       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7149 
7150   // Try to split the vector types to get down to legal types.
7151   EVT SplitSrcVT = SrcVT;
7152   EVT SplitDstVT = DstVT;
7153   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7154          SplitSrcVT.getVectorNumElements() > 1) {
7155     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7156     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7157   }
7158 
7159   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7160     return SDValue();
7161 
7162   SDLoc DL(N);
7163   const unsigned NumSplits =
7164       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7165   const unsigned Stride = SplitSrcVT.getStoreSize();
7166   SmallVector<SDValue, 4> Loads;
7167   SmallVector<SDValue, 4> Chains;
7168 
7169   SDValue BasePtr = LN0->getBasePtr();
7170   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7171     const unsigned Offset = Idx * Stride;
7172     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7173 
7174     SDValue SplitLoad = DAG.getExtLoad(
7175         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7176         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7177         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7178 
7179     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7180                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7181 
7182     Loads.push_back(SplitLoad.getValue(0));
7183     Chains.push_back(SplitLoad.getValue(1));
7184   }
7185 
7186   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7187   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7188 
7189   // Simplify TF.
7190   AddToWorklist(NewChain.getNode());
7191 
7192   CombineTo(N, NewValue);
7193 
7194   // Replace uses of the original load (before extension)
7195   // with a truncate of the concatenated sextloaded vectors.
7196   SDValue Trunc =
7197       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7198   CombineTo(N0.getNode(), Trunc, NewChain);
7199   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7200                   (ISD::NodeType)N->getOpcode());
7201   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7202 }
7203 
7204 /// If we're narrowing or widening the result of a vector select and the final
7205 /// size is the same size as a setcc (compare) feeding the select, then try to
7206 /// apply the cast operation to the select's operands because matching vector
7207 /// sizes for a select condition and other operands should be more efficient.
7208 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7209   unsigned CastOpcode = Cast->getOpcode();
7210   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7211           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7212           CastOpcode == ISD::FP_ROUND) &&
7213          "Unexpected opcode for vector select narrowing/widening");
7214 
7215   // We only do this transform before legal ops because the pattern may be
7216   // obfuscated by target-specific operations after legalization. Do not create
7217   // an illegal select op, however, because that may be difficult to lower.
7218   EVT VT = Cast->getValueType(0);
7219   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7220     return SDValue();
7221 
7222   SDValue VSel = Cast->getOperand(0);
7223   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7224       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7225     return SDValue();
7226 
7227   // Does the setcc have the same vector size as the casted select?
7228   SDValue SetCC = VSel.getOperand(0);
7229   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7230   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7231     return SDValue();
7232 
7233   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7234   SDValue A = VSel.getOperand(1);
7235   SDValue B = VSel.getOperand(2);
7236   SDValue CastA, CastB;
7237   SDLoc DL(Cast);
7238   if (CastOpcode == ISD::FP_ROUND) {
7239     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7240     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7241     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7242   } else {
7243     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7244     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7245   }
7246   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7247 }
7248 
7249 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7250   SDValue N0 = N->getOperand(0);
7251   EVT VT = N->getValueType(0);
7252   SDLoc DL(N);
7253 
7254   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7255                                               LegalOperations))
7256     return SDValue(Res, 0);
7257 
7258   // fold (sext (sext x)) -> (sext x)
7259   // fold (sext (aext x)) -> (sext x)
7260   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7261     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7262 
7263   if (N0.getOpcode() == ISD::TRUNCATE) {
7264     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7265     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7266     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7267       SDNode *oye = N0.getOperand(0).getNode();
7268       if (NarrowLoad.getNode() != N0.getNode()) {
7269         CombineTo(N0.getNode(), NarrowLoad);
7270         // CombineTo deleted the truncate, if needed, but not what's under it.
7271         AddToWorklist(oye);
7272       }
7273       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7274     }
7275 
7276     // See if the value being truncated is already sign extended.  If so, just
7277     // eliminate the trunc/sext pair.
7278     SDValue Op = N0.getOperand(0);
7279     unsigned OpBits   = Op.getScalarValueSizeInBits();
7280     unsigned MidBits  = N0.getScalarValueSizeInBits();
7281     unsigned DestBits = VT.getScalarSizeInBits();
7282     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7283 
7284     if (OpBits == DestBits) {
7285       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7286       // bits, it is already ready.
7287       if (NumSignBits > DestBits-MidBits)
7288         return Op;
7289     } else if (OpBits < DestBits) {
7290       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7291       // bits, just sext from i32.
7292       if (NumSignBits > OpBits-MidBits)
7293         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7294     } else {
7295       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7296       // bits, just truncate to i32.
7297       if (NumSignBits > OpBits-MidBits)
7298         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7299     }
7300 
7301     // fold (sext (truncate x)) -> (sextinreg x).
7302     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7303                                                  N0.getValueType())) {
7304       if (OpBits < DestBits)
7305         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7306       else if (OpBits > DestBits)
7307         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7308       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7309                          DAG.getValueType(N0.getValueType()));
7310     }
7311   }
7312 
7313   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7314   // Only generate vector extloads when 1) they're legal, and 2) they are
7315   // deemed desirable by the target.
7316   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7317       ((!LegalOperations && !VT.isVector() &&
7318         !cast<LoadSDNode>(N0)->isVolatile()) ||
7319        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7320     bool DoXform = true;
7321     SmallVector<SDNode*, 4> SetCCs;
7322     if (!N0.hasOneUse())
7323       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7324     if (VT.isVector())
7325       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7326     if (DoXform) {
7327       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7328       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7329                                        LN0->getBasePtr(), N0.getValueType(),
7330                                        LN0->getMemOperand());
7331       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7332                                   N0.getValueType(), ExtLoad);
7333       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7334       // If the load value is used only by N, replace it via CombineTo N.
7335       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7336       CombineTo(N, ExtLoad);
7337       if (NoReplaceTrunc)
7338         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7339       else
7340         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7341       return SDValue(N, 0);
7342     }
7343   }
7344 
7345   // fold (sext (load x)) to multiple smaller sextloads.
7346   // Only on illegal but splittable vectors.
7347   if (SDValue ExtLoad = CombineExtLoad(N))
7348     return ExtLoad;
7349 
7350   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7351   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7352   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7353       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7354     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7355     EVT MemVT = LN0->getMemoryVT();
7356     if ((!LegalOperations && !LN0->isVolatile()) ||
7357         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7358       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7359                                        LN0->getBasePtr(), MemVT,
7360                                        LN0->getMemOperand());
7361       CombineTo(N, ExtLoad);
7362       CombineTo(N0.getNode(),
7363                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7364                             N0.getValueType(), ExtLoad),
7365                 ExtLoad.getValue(1));
7366       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7367     }
7368   }
7369 
7370   // fold (sext (and/or/xor (load x), cst)) ->
7371   //      (and/or/xor (sextload x), (sext cst))
7372   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7373        N0.getOpcode() == ISD::XOR) &&
7374       isa<LoadSDNode>(N0.getOperand(0)) &&
7375       N0.getOperand(1).getOpcode() == ISD::Constant &&
7376       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7377       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7378     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7379     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7380       bool DoXform = true;
7381       SmallVector<SDNode*, 4> SetCCs;
7382       if (!N0.hasOneUse())
7383         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7384                                           SetCCs, TLI);
7385       if (DoXform) {
7386         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7387                                          LN0->getChain(), LN0->getBasePtr(),
7388                                          LN0->getMemoryVT(),
7389                                          LN0->getMemOperand());
7390         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7391         Mask = Mask.sext(VT.getSizeInBits());
7392         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7393                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7394         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7395                                     SDLoc(N0.getOperand(0)),
7396                                     N0.getOperand(0).getValueType(), ExtLoad);
7397         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7398         bool NoReplaceTruncAnd = !N0.hasOneUse();
7399         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7400         CombineTo(N, And);
7401         // If N0 has multiple uses, change other uses as well.
7402         if (NoReplaceTruncAnd) {
7403           SDValue TruncAnd =
7404               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7405           CombineTo(N0.getNode(), TruncAnd);
7406         }
7407         if (NoReplaceTrunc)
7408           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7409         else
7410           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7411         return SDValue(N,0); // Return N so it doesn't get rechecked!
7412       }
7413     }
7414   }
7415 
7416   if (N0.getOpcode() == ISD::SETCC) {
7417     SDValue N00 = N0.getOperand(0);
7418     SDValue N01 = N0.getOperand(1);
7419     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7420     EVT N00VT = N0.getOperand(0).getValueType();
7421 
7422     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7423     // Only do this before legalize for now.
7424     if (VT.isVector() && !LegalOperations &&
7425         TLI.getBooleanContents(N00VT) ==
7426             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7427       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7428       // of the same size as the compared operands. Only optimize sext(setcc())
7429       // if this is the case.
7430       EVT SVT = getSetCCResultType(N00VT);
7431 
7432       // We know that the # elements of the results is the same as the
7433       // # elements of the compare (and the # elements of the compare result
7434       // for that matter).  Check to see that they are the same size.  If so,
7435       // we know that the element size of the sext'd result matches the
7436       // element size of the compare operands.
7437       if (VT.getSizeInBits() == SVT.getSizeInBits())
7438         return DAG.getSetCC(DL, VT, N00, N01, CC);
7439 
7440       // If the desired elements are smaller or larger than the source
7441       // elements, we can use a matching integer vector type and then
7442       // truncate/sign extend.
7443       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7444       if (SVT == MatchingVecType) {
7445         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7446         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7447       }
7448     }
7449 
7450     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7451     // Here, T can be 1 or -1, depending on the type of the setcc and
7452     // getBooleanContents().
7453     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7454 
7455     // To determine the "true" side of the select, we need to know the high bit
7456     // of the value returned by the setcc if it evaluates to true.
7457     // If the type of the setcc is i1, then the true case of the select is just
7458     // sext(i1 1), that is, -1.
7459     // If the type of the setcc is larger (say, i8) then the value of the high
7460     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7461     // of the appropriate width.
7462     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7463                                            : TLI.getConstTrueVal(DAG, VT, DL);
7464     SDValue Zero = DAG.getConstant(0, DL, VT);
7465     if (SDValue SCC =
7466             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7467       return SCC;
7468 
7469     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
7470       EVT SetCCVT = getSetCCResultType(N00VT);
7471       // Don't do this transform for i1 because there's a select transform
7472       // that would reverse it.
7473       // TODO: We should not do this transform at all without a target hook
7474       // because a sext is likely cheaper than a select?
7475       if (SetCCVT.getScalarSizeInBits() != 1 &&
7476           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7477         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7478         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7479       }
7480     }
7481   }
7482 
7483   // fold (sext x) -> (zext x) if the sign bit is known zero.
7484   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7485       DAG.SignBitIsZero(N0))
7486     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7487 
7488   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7489     return NewVSel;
7490 
7491   return SDValue();
7492 }
7493 
7494 // isTruncateOf - If N is a truncate of some other value, return true, record
7495 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7496 // This function computes KnownBits to avoid a duplicated call to
7497 // computeKnownBits in the caller.
7498 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7499                          KnownBits &Known) {
7500   if (N->getOpcode() == ISD::TRUNCATE) {
7501     Op = N->getOperand(0);
7502     DAG.computeKnownBits(Op, Known);
7503     return true;
7504   }
7505 
7506   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7507       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7508     return false;
7509 
7510   SDValue Op0 = N->getOperand(0);
7511   SDValue Op1 = N->getOperand(1);
7512   assert(Op0.getValueType() == Op1.getValueType());
7513 
7514   if (isNullConstant(Op0))
7515     Op = Op1;
7516   else if (isNullConstant(Op1))
7517     Op = Op0;
7518   else
7519     return false;
7520 
7521   DAG.computeKnownBits(Op, Known);
7522 
7523   if (!(Known.Zero | 1).isAllOnesValue())
7524     return false;
7525 
7526   return true;
7527 }
7528 
7529 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7530   SDValue N0 = N->getOperand(0);
7531   EVT VT = N->getValueType(0);
7532 
7533   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7534                                               LegalOperations))
7535     return SDValue(Res, 0);
7536 
7537   // fold (zext (zext x)) -> (zext x)
7538   // fold (zext (aext x)) -> (zext x)
7539   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7540     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7541                        N0.getOperand(0));
7542 
7543   // fold (zext (truncate x)) -> (zext x) or
7544   //      (zext (truncate x)) -> (truncate x)
7545   // This is valid when the truncated bits of x are already zero.
7546   // FIXME: We should extend this to work for vectors too.
7547   SDValue Op;
7548   KnownBits Known;
7549   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7550     APInt TruncatedBits =
7551       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7552       APInt(Op.getValueSizeInBits(), 0) :
7553       APInt::getBitsSet(Op.getValueSizeInBits(),
7554                         N0.getValueSizeInBits(),
7555                         std::min(Op.getValueSizeInBits(),
7556                                  VT.getSizeInBits()));
7557     if (TruncatedBits.isSubsetOf(Known.Zero))
7558       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7559   }
7560 
7561   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7562   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7563   if (N0.getOpcode() == ISD::TRUNCATE) {
7564     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7565       SDNode *oye = N0.getOperand(0).getNode();
7566       if (NarrowLoad.getNode() != N0.getNode()) {
7567         CombineTo(N0.getNode(), NarrowLoad);
7568         // CombineTo deleted the truncate, if needed, but not what's under it.
7569         AddToWorklist(oye);
7570       }
7571       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7572     }
7573   }
7574 
7575   // fold (zext (truncate x)) -> (and x, mask)
7576   if (N0.getOpcode() == ISD::TRUNCATE) {
7577     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7578     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7579     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7580       SDNode *oye = N0.getOperand(0).getNode();
7581       if (NarrowLoad.getNode() != N0.getNode()) {
7582         CombineTo(N0.getNode(), NarrowLoad);
7583         // CombineTo deleted the truncate, if needed, but not what's under it.
7584         AddToWorklist(oye);
7585       }
7586       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7587     }
7588 
7589     EVT SrcVT = N0.getOperand(0).getValueType();
7590     EVT MinVT = N0.getValueType();
7591 
7592     // Try to mask before the extension to avoid having to generate a larger mask,
7593     // possibly over several sub-vectors.
7594     if (SrcVT.bitsLT(VT)) {
7595       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7596                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7597         SDValue Op = N0.getOperand(0);
7598         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7599         AddToWorklist(Op.getNode());
7600         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7601       }
7602     }
7603 
7604     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7605       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7606       AddToWorklist(Op.getNode());
7607       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7608     }
7609   }
7610 
7611   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7612   // if either of the casts is not free.
7613   if (N0.getOpcode() == ISD::AND &&
7614       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7615       N0.getOperand(1).getOpcode() == ISD::Constant &&
7616       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7617                            N0.getValueType()) ||
7618        !TLI.isZExtFree(N0.getValueType(), VT))) {
7619     SDValue X = N0.getOperand(0).getOperand(0);
7620     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7621     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7622     Mask = Mask.zext(VT.getSizeInBits());
7623     SDLoc DL(N);
7624     return DAG.getNode(ISD::AND, DL, VT,
7625                        X, DAG.getConstant(Mask, DL, VT));
7626   }
7627 
7628   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7629   // Only generate vector extloads when 1) they're legal, and 2) they are
7630   // deemed desirable by the target.
7631   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7632       ((!LegalOperations && !VT.isVector() &&
7633         !cast<LoadSDNode>(N0)->isVolatile()) ||
7634        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7635     bool DoXform = true;
7636     SmallVector<SDNode*, 4> SetCCs;
7637     if (!N0.hasOneUse())
7638       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7639     if (VT.isVector())
7640       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7641     if (DoXform) {
7642       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7643       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7644                                        LN0->getChain(),
7645                                        LN0->getBasePtr(), N0.getValueType(),
7646                                        LN0->getMemOperand());
7647 
7648       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7649                                   N0.getValueType(), ExtLoad);
7650       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7651       // If the load value is used only by N, replace it via CombineTo N.
7652       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7653       CombineTo(N, ExtLoad);
7654       if (NoReplaceTrunc)
7655         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7656       else
7657         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7658       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7659     }
7660   }
7661 
7662   // fold (zext (load x)) to multiple smaller zextloads.
7663   // Only on illegal but splittable vectors.
7664   if (SDValue ExtLoad = CombineExtLoad(N))
7665     return ExtLoad;
7666 
7667   // fold (zext (and/or/xor (load x), cst)) ->
7668   //      (and/or/xor (zextload x), (zext cst))
7669   // Unless (and (load x) cst) will match as a zextload already and has
7670   // additional users.
7671   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7672        N0.getOpcode() == ISD::XOR) &&
7673       isa<LoadSDNode>(N0.getOperand(0)) &&
7674       N0.getOperand(1).getOpcode() == ISD::Constant &&
7675       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7676       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7677     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7678     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7679       bool DoXform = true;
7680       SmallVector<SDNode*, 4> SetCCs;
7681       if (!N0.hasOneUse()) {
7682         if (N0.getOpcode() == ISD::AND) {
7683           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7684           auto NarrowLoad = false;
7685           EVT LoadResultTy = AndC->getValueType(0);
7686           EVT ExtVT, LoadedVT;
7687           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7688                                NarrowLoad))
7689             DoXform = false;
7690         }
7691         if (DoXform)
7692           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7693                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7694       }
7695       if (DoXform) {
7696         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7697                                          LN0->getChain(), LN0->getBasePtr(),
7698                                          LN0->getMemoryVT(),
7699                                          LN0->getMemOperand());
7700         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7701         Mask = Mask.zext(VT.getSizeInBits());
7702         SDLoc DL(N);
7703         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7704                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7705         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7706                                     SDLoc(N0.getOperand(0)),
7707                                     N0.getOperand(0).getValueType(), ExtLoad);
7708         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7709         bool NoReplaceTruncAnd = !N0.hasOneUse();
7710         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7711         CombineTo(N, And);
7712         // If N0 has multiple uses, change other uses as well.
7713         if (NoReplaceTruncAnd) {
7714           SDValue TruncAnd =
7715               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7716           CombineTo(N0.getNode(), TruncAnd);
7717         }
7718         if (NoReplaceTrunc)
7719           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7720         else
7721           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7722         return SDValue(N,0); // Return N so it doesn't get rechecked!
7723       }
7724     }
7725   }
7726 
7727   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7728   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7729   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7730       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7731     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7732     EVT MemVT = LN0->getMemoryVT();
7733     if ((!LegalOperations && !LN0->isVolatile()) ||
7734         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7735       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7736                                        LN0->getChain(),
7737                                        LN0->getBasePtr(), MemVT,
7738                                        LN0->getMemOperand());
7739       CombineTo(N, ExtLoad);
7740       CombineTo(N0.getNode(),
7741                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7742                             ExtLoad),
7743                 ExtLoad.getValue(1));
7744       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7745     }
7746   }
7747 
7748   if (N0.getOpcode() == ISD::SETCC) {
7749     // Only do this before legalize for now.
7750     if (!LegalOperations && VT.isVector() &&
7751         N0.getValueType().getVectorElementType() == MVT::i1) {
7752       EVT N00VT = N0.getOperand(0).getValueType();
7753       if (getSetCCResultType(N00VT) == N0.getValueType())
7754         return SDValue();
7755 
7756       // We know that the # elements of the results is the same as the #
7757       // elements of the compare (and the # elements of the compare result for
7758       // that matter). Check to see that they are the same size. If so, we know
7759       // that the element size of the sext'd result matches the element size of
7760       // the compare operands.
7761       SDLoc DL(N);
7762       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7763       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7764         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7765         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7766                                      N0.getOperand(1), N0.getOperand(2));
7767         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7768       }
7769 
7770       // If the desired elements are smaller or larger than the source
7771       // elements we can use a matching integer vector type and then
7772       // truncate/sign extend.
7773       EVT MatchingElementType = EVT::getIntegerVT(
7774           *DAG.getContext(), N00VT.getScalarSizeInBits());
7775       EVT MatchingVectorType = EVT::getVectorVT(
7776           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7777       SDValue VsetCC =
7778           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7779                       N0.getOperand(1), N0.getOperand(2));
7780       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7781                          VecOnes);
7782     }
7783 
7784     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7785     SDLoc DL(N);
7786     if (SDValue SCC = SimplifySelectCC(
7787             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7788             DAG.getConstant(0, DL, VT),
7789             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7790       return SCC;
7791   }
7792 
7793   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7794   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7795       isa<ConstantSDNode>(N0.getOperand(1)) &&
7796       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7797       N0.hasOneUse()) {
7798     SDValue ShAmt = N0.getOperand(1);
7799     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7800     if (N0.getOpcode() == ISD::SHL) {
7801       SDValue InnerZExt = N0.getOperand(0);
7802       // If the original shl may be shifting out bits, do not perform this
7803       // transformation.
7804       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7805         InnerZExt.getOperand(0).getValueSizeInBits();
7806       if (ShAmtVal > KnownZeroBits)
7807         return SDValue();
7808     }
7809 
7810     SDLoc DL(N);
7811 
7812     // Ensure that the shift amount is wide enough for the shifted value.
7813     if (VT.getSizeInBits() >= 256)
7814       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7815 
7816     return DAG.getNode(N0.getOpcode(), DL, VT,
7817                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7818                        ShAmt);
7819   }
7820 
7821   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7822     return NewVSel;
7823 
7824   return SDValue();
7825 }
7826 
7827 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7828   SDValue N0 = N->getOperand(0);
7829   EVT VT = N->getValueType(0);
7830 
7831   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7832                                               LegalOperations))
7833     return SDValue(Res, 0);
7834 
7835   // fold (aext (aext x)) -> (aext x)
7836   // fold (aext (zext x)) -> (zext x)
7837   // fold (aext (sext x)) -> (sext x)
7838   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7839       N0.getOpcode() == ISD::ZERO_EXTEND ||
7840       N0.getOpcode() == ISD::SIGN_EXTEND)
7841     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7842 
7843   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7844   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7845   if (N0.getOpcode() == ISD::TRUNCATE) {
7846     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7847       SDNode *oye = N0.getOperand(0).getNode();
7848       if (NarrowLoad.getNode() != N0.getNode()) {
7849         CombineTo(N0.getNode(), NarrowLoad);
7850         // CombineTo deleted the truncate, if needed, but not what's under it.
7851         AddToWorklist(oye);
7852       }
7853       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7854     }
7855   }
7856 
7857   // fold (aext (truncate x))
7858   if (N0.getOpcode() == ISD::TRUNCATE)
7859     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7860 
7861   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7862   // if the trunc is not free.
7863   if (N0.getOpcode() == ISD::AND &&
7864       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7865       N0.getOperand(1).getOpcode() == ISD::Constant &&
7866       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7867                           N0.getValueType())) {
7868     SDLoc DL(N);
7869     SDValue X = N0.getOperand(0).getOperand(0);
7870     X = DAG.getAnyExtOrTrunc(X, DL, VT);
7871     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7872     Mask = Mask.zext(VT.getSizeInBits());
7873     return DAG.getNode(ISD::AND, DL, VT,
7874                        X, DAG.getConstant(Mask, DL, VT));
7875   }
7876 
7877   // fold (aext (load x)) -> (aext (truncate (extload x)))
7878   // None of the supported targets knows how to perform load and any_ext
7879   // on vectors in one instruction.  We only perform this transformation on
7880   // scalars.
7881   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7882       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7883       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7884     bool DoXform = true;
7885     SmallVector<SDNode*, 4> SetCCs;
7886     if (!N0.hasOneUse())
7887       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7888     if (DoXform) {
7889       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7890       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7891                                        LN0->getChain(),
7892                                        LN0->getBasePtr(), N0.getValueType(),
7893                                        LN0->getMemOperand());
7894       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7895                                   N0.getValueType(), ExtLoad);
7896       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7897                       ISD::ANY_EXTEND);
7898       // If the load value is used only by N, replace it via CombineTo N.
7899       bool NoReplaceTrunc = N0.hasOneUse();
7900       CombineTo(N, ExtLoad);
7901       if (NoReplaceTrunc)
7902         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7903       else
7904         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7905       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7906     }
7907   }
7908 
7909   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7910   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7911   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7912   if (N0.getOpcode() == ISD::LOAD &&
7913       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7914       N0.hasOneUse()) {
7915     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7916     ISD::LoadExtType ExtType = LN0->getExtensionType();
7917     EVT MemVT = LN0->getMemoryVT();
7918     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7919       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7920                                        VT, LN0->getChain(), LN0->getBasePtr(),
7921                                        MemVT, LN0->getMemOperand());
7922       CombineTo(N, ExtLoad);
7923       CombineTo(N0.getNode(),
7924                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7925                             N0.getValueType(), ExtLoad),
7926                 ExtLoad.getValue(1));
7927       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7928     }
7929   }
7930 
7931   if (N0.getOpcode() == ISD::SETCC) {
7932     // For vectors:
7933     // aext(setcc) -> vsetcc
7934     // aext(setcc) -> truncate(vsetcc)
7935     // aext(setcc) -> aext(vsetcc)
7936     // Only do this before legalize for now.
7937     if (VT.isVector() && !LegalOperations) {
7938       EVT N0VT = N0.getOperand(0).getValueType();
7939         // We know that the # elements of the results is the same as the
7940         // # elements of the compare (and the # elements of the compare result
7941         // for that matter).  Check to see that they are the same size.  If so,
7942         // we know that the element size of the sext'd result matches the
7943         // element size of the compare operands.
7944       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7945         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7946                              N0.getOperand(1),
7947                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7948       // If the desired elements are smaller or larger than the source
7949       // elements we can use a matching integer vector type and then
7950       // truncate/any extend
7951       else {
7952         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7953         SDValue VsetCC =
7954           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7955                         N0.getOperand(1),
7956                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7957         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7958       }
7959     }
7960 
7961     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7962     SDLoc DL(N);
7963     if (SDValue SCC = SimplifySelectCC(
7964             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7965             DAG.getConstant(0, DL, VT),
7966             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7967       return SCC;
7968   }
7969 
7970   return SDValue();
7971 }
7972 
7973 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7974   SDValue N0 = N->getOperand(0);
7975   SDValue N1 = N->getOperand(1);
7976   EVT EVT = cast<VTSDNode>(N1)->getVT();
7977 
7978   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7979   if (N0.getOpcode() == ISD::AssertZext &&
7980       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7981     return N0;
7982 
7983   return SDValue();
7984 }
7985 
7986 /// If the result of a wider load is shifted to right of N  bits and then
7987 /// truncated to a narrower type and where N is a multiple of number of bits of
7988 /// the narrower type, transform it to a narrower load from address + N / num of
7989 /// bits of new type. If the result is to be extended, also fold the extension
7990 /// to form a extending load.
7991 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7992   unsigned Opc = N->getOpcode();
7993 
7994   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7995   SDValue N0 = N->getOperand(0);
7996   EVT VT = N->getValueType(0);
7997   EVT ExtVT = VT;
7998 
7999   // This transformation isn't valid for vector loads.
8000   if (VT.isVector())
8001     return SDValue();
8002 
8003   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8004   // extended to VT.
8005   if (Opc == ISD::SIGN_EXTEND_INREG) {
8006     ExtType = ISD::SEXTLOAD;
8007     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8008   } else if (Opc == ISD::SRL) {
8009     // Another special-case: SRL is basically zero-extending a narrower value.
8010     ExtType = ISD::ZEXTLOAD;
8011     N0 = SDValue(N, 0);
8012     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8013     if (!N01) return SDValue();
8014     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8015                               VT.getSizeInBits() - N01->getZExtValue());
8016   }
8017   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
8018     return SDValue();
8019 
8020   unsigned EVTBits = ExtVT.getSizeInBits();
8021 
8022   // Do not generate loads of non-round integer types since these can
8023   // be expensive (and would be wrong if the type is not byte sized).
8024   if (!ExtVT.isRound())
8025     return SDValue();
8026 
8027   unsigned ShAmt = 0;
8028   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8029     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8030       ShAmt = N01->getZExtValue();
8031       // Is the shift amount a multiple of size of VT?
8032       if ((ShAmt & (EVTBits-1)) == 0) {
8033         N0 = N0.getOperand(0);
8034         // Is the load width a multiple of size of VT?
8035         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8036           return SDValue();
8037       }
8038 
8039       // At this point, we must have a load or else we can't do the transform.
8040       if (!isa<LoadSDNode>(N0)) return SDValue();
8041 
8042       // Because a SRL must be assumed to *need* to zero-extend the high bits
8043       // (as opposed to anyext the high bits), we can't combine the zextload
8044       // lowering of SRL and an sextload.
8045       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
8046         return SDValue();
8047 
8048       // If the shift amount is larger than the input type then we're not
8049       // accessing any of the loaded bytes.  If the load was a zextload/extload
8050       // then the result of the shift+trunc is zero/undef (handled elsewhere).
8051       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
8052         return SDValue();
8053     }
8054   }
8055 
8056   // If the load is shifted left (and the result isn't shifted back right),
8057   // we can fold the truncate through the shift.
8058   unsigned ShLeftAmt = 0;
8059   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8060       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8061     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8062       ShLeftAmt = N01->getZExtValue();
8063       N0 = N0.getOperand(0);
8064     }
8065   }
8066 
8067   // If we haven't found a load, we can't narrow it.  Don't transform one with
8068   // multiple uses, this would require adding a new load.
8069   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
8070     return SDValue();
8071 
8072   // Don't change the width of a volatile load.
8073   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8074   if (LN0->isVolatile())
8075     return SDValue();
8076 
8077   // Verify that we are actually reducing a load width here.
8078   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
8079     return SDValue();
8080 
8081   // For the transform to be legal, the load must produce only two values
8082   // (the value loaded and the chain).  Don't transform a pre-increment
8083   // load, for example, which produces an extra value.  Otherwise the
8084   // transformation is not equivalent, and the downstream logic to replace
8085   // uses gets things wrong.
8086   if (LN0->getNumValues() > 2)
8087     return SDValue();
8088 
8089   // If the load that we're shrinking is an extload and we're not just
8090   // discarding the extension we can't simply shrink the load. Bail.
8091   // TODO: It would be possible to merge the extensions in some cases.
8092   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
8093       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
8094     return SDValue();
8095 
8096   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
8097     return SDValue();
8098 
8099   EVT PtrType = N0.getOperand(1).getValueType();
8100 
8101   if (PtrType == MVT::Untyped || PtrType.isExtended())
8102     // It's not possible to generate a constant of extended or untyped type.
8103     return SDValue();
8104 
8105   // For big endian targets, we need to adjust the offset to the pointer to
8106   // load the correct bytes.
8107   if (DAG.getDataLayout().isBigEndian()) {
8108     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8109     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8110     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8111   }
8112 
8113   uint64_t PtrOff = ShAmt / 8;
8114   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8115   SDLoc DL(LN0);
8116   // The original load itself didn't wrap, so an offset within it doesn't.
8117   SDNodeFlags Flags;
8118   Flags.setNoUnsignedWrap(true);
8119   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8120                                PtrType, LN0->getBasePtr(),
8121                                DAG.getConstant(PtrOff, DL, PtrType),
8122                                Flags);
8123   AddToWorklist(NewPtr.getNode());
8124 
8125   SDValue Load;
8126   if (ExtType == ISD::NON_EXTLOAD)
8127     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8128                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8129                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8130   else
8131     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8132                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8133                           NewAlign, LN0->getMemOperand()->getFlags(),
8134                           LN0->getAAInfo());
8135 
8136   // Replace the old load's chain with the new load's chain.
8137   WorklistRemover DeadNodes(*this);
8138   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8139 
8140   // Shift the result left, if we've swallowed a left shift.
8141   SDValue Result = Load;
8142   if (ShLeftAmt != 0) {
8143     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8144     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8145       ShImmTy = VT;
8146     // If the shift amount is as large as the result size (but, presumably,
8147     // no larger than the source) then the useful bits of the result are
8148     // zero; we can't simply return the shortened shift, because the result
8149     // of that operation is undefined.
8150     SDLoc DL(N0);
8151     if (ShLeftAmt >= VT.getSizeInBits())
8152       Result = DAG.getConstant(0, DL, VT);
8153     else
8154       Result = DAG.getNode(ISD::SHL, DL, VT,
8155                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8156   }
8157 
8158   // Return the new loaded value.
8159   return Result;
8160 }
8161 
8162 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8163   SDValue N0 = N->getOperand(0);
8164   SDValue N1 = N->getOperand(1);
8165   EVT VT = N->getValueType(0);
8166   EVT EVT = cast<VTSDNode>(N1)->getVT();
8167   unsigned VTBits = VT.getScalarSizeInBits();
8168   unsigned EVTBits = EVT.getScalarSizeInBits();
8169 
8170   if (N0.isUndef())
8171     return DAG.getUNDEF(VT);
8172 
8173   // fold (sext_in_reg c1) -> c1
8174   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8175     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8176 
8177   // If the input is already sign extended, just drop the extension.
8178   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8179     return N0;
8180 
8181   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8182   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8183       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8184     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8185                        N0.getOperand(0), N1);
8186 
8187   // fold (sext_in_reg (sext x)) -> (sext x)
8188   // fold (sext_in_reg (aext x)) -> (sext x)
8189   // if x is small enough.
8190   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8191     SDValue N00 = N0.getOperand(0);
8192     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8193         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8194       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8195   }
8196 
8197   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8198   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8199        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8200        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8201       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8202     if (!LegalOperations ||
8203         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8204       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8205   }
8206 
8207   // fold (sext_in_reg (zext x)) -> (sext x)
8208   // iff we are extending the source sign bit.
8209   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8210     SDValue N00 = N0.getOperand(0);
8211     if (N00.getScalarValueSizeInBits() == EVTBits &&
8212         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8213       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8214   }
8215 
8216   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8217   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8218     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8219 
8220   // fold operands of sext_in_reg based on knowledge that the top bits are not
8221   // demanded.
8222   if (SimplifyDemandedBits(SDValue(N, 0)))
8223     return SDValue(N, 0);
8224 
8225   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8226   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8227   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8228     return NarrowLoad;
8229 
8230   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8231   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8232   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8233   if (N0.getOpcode() == ISD::SRL) {
8234     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8235       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8236         // We can turn this into an SRA iff the input to the SRL is already sign
8237         // extended enough.
8238         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8239         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8240           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8241                              N0.getOperand(0), N0.getOperand(1));
8242       }
8243   }
8244 
8245   // fold (sext_inreg (extload x)) -> (sextload x)
8246   if (ISD::isEXTLoad(N0.getNode()) &&
8247       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8248       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8249       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8250        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8251     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8252     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8253                                      LN0->getChain(),
8254                                      LN0->getBasePtr(), EVT,
8255                                      LN0->getMemOperand());
8256     CombineTo(N, ExtLoad);
8257     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8258     AddToWorklist(ExtLoad.getNode());
8259     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8260   }
8261   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8262   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8263       N0.hasOneUse() &&
8264       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8265       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8266        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8267     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8268     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8269                                      LN0->getChain(),
8270                                      LN0->getBasePtr(), EVT,
8271                                      LN0->getMemOperand());
8272     CombineTo(N, ExtLoad);
8273     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8274     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8275   }
8276 
8277   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8278   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8279     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8280                                            N0.getOperand(1), false))
8281       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8282                          BSwap, N1);
8283   }
8284 
8285   return SDValue();
8286 }
8287 
8288 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8289   SDValue N0 = N->getOperand(0);
8290   EVT VT = N->getValueType(0);
8291 
8292   if (N0.isUndef())
8293     return DAG.getUNDEF(VT);
8294 
8295   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8296                                               LegalOperations))
8297     return SDValue(Res, 0);
8298 
8299   return SDValue();
8300 }
8301 
8302 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8303   SDValue N0 = N->getOperand(0);
8304   EVT VT = N->getValueType(0);
8305 
8306   if (N0.isUndef())
8307     return DAG.getUNDEF(VT);
8308 
8309   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8310                                               LegalOperations))
8311     return SDValue(Res, 0);
8312 
8313   return SDValue();
8314 }
8315 
8316 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8317   SDValue N0 = N->getOperand(0);
8318   EVT VT = N->getValueType(0);
8319   bool isLE = DAG.getDataLayout().isLittleEndian();
8320 
8321   // noop truncate
8322   if (N0.getValueType() == N->getValueType(0))
8323     return N0;
8324   // fold (truncate c1) -> c1
8325   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8326     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8327   // fold (truncate (truncate x)) -> (truncate x)
8328   if (N0.getOpcode() == ISD::TRUNCATE)
8329     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8330   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8331   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8332       N0.getOpcode() == ISD::SIGN_EXTEND ||
8333       N0.getOpcode() == ISD::ANY_EXTEND) {
8334     // if the source is smaller than the dest, we still need an extend.
8335     if (N0.getOperand(0).getValueType().bitsLT(VT))
8336       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8337     // if the source is larger than the dest, than we just need the truncate.
8338     if (N0.getOperand(0).getValueType().bitsGT(VT))
8339       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8340     // if the source and dest are the same type, we can drop both the extend
8341     // and the truncate.
8342     return N0.getOperand(0);
8343   }
8344 
8345   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8346   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8347     return SDValue();
8348 
8349   // Fold extract-and-trunc into a narrow extract. For example:
8350   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8351   //   i32 y = TRUNCATE(i64 x)
8352   //        -- becomes --
8353   //   v16i8 b = BITCAST (v2i64 val)
8354   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8355   //
8356   // Note: We only run this optimization after type legalization (which often
8357   // creates this pattern) and before operation legalization after which
8358   // we need to be more careful about the vector instructions that we generate.
8359   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8360       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8361 
8362     EVT VecTy = N0.getOperand(0).getValueType();
8363     EVT ExTy = N0.getValueType();
8364     EVT TrTy = N->getValueType(0);
8365 
8366     unsigned NumElem = VecTy.getVectorNumElements();
8367     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8368 
8369     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8370     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8371 
8372     SDValue EltNo = N0->getOperand(1);
8373     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8374       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8375       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8376       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8377 
8378       SDLoc DL(N);
8379       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8380                          DAG.getBitcast(NVT, N0.getOperand(0)),
8381                          DAG.getConstant(Index, DL, IndexTy));
8382     }
8383   }
8384 
8385   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8386   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8387     EVT SrcVT = N0.getValueType();
8388     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8389         TLI.isTruncateFree(SrcVT, VT)) {
8390       SDLoc SL(N0);
8391       SDValue Cond = N0.getOperand(0);
8392       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8393       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8394       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8395     }
8396   }
8397 
8398   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8399   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8400       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8401       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8402     SDValue Amt = N0.getOperand(1);
8403     KnownBits Known;
8404     DAG.computeKnownBits(Amt, Known);
8405     unsigned Size = VT.getScalarSizeInBits();
8406     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8407       SDLoc SL(N);
8408       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8409 
8410       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8411       if (AmtVT != Amt.getValueType()) {
8412         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8413         AddToWorklist(Amt.getNode());
8414       }
8415       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8416     }
8417   }
8418 
8419   // Fold a series of buildvector, bitcast, and truncate if possible.
8420   // For example fold
8421   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8422   //   (2xi32 (buildvector x, y)).
8423   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8424       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8425       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8426       N0.getOperand(0).hasOneUse()) {
8427 
8428     SDValue BuildVect = N0.getOperand(0);
8429     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8430     EVT TruncVecEltTy = VT.getVectorElementType();
8431 
8432     // Check that the element types match.
8433     if (BuildVectEltTy == TruncVecEltTy) {
8434       // Now we only need to compute the offset of the truncated elements.
8435       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8436       unsigned TruncVecNumElts = VT.getVectorNumElements();
8437       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8438 
8439       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8440              "Invalid number of elements");
8441 
8442       SmallVector<SDValue, 8> Opnds;
8443       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8444         Opnds.push_back(BuildVect.getOperand(i));
8445 
8446       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8447     }
8448   }
8449 
8450   // See if we can simplify the input to this truncate through knowledge that
8451   // only the low bits are being used.
8452   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8453   // Currently we only perform this optimization on scalars because vectors
8454   // may have different active low bits.
8455   if (!VT.isVector()) {
8456     APInt Mask =
8457         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8458     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8459       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8460   }
8461 
8462   // fold (truncate (load x)) -> (smaller load x)
8463   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8464   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8465     if (SDValue Reduced = ReduceLoadWidth(N))
8466       return Reduced;
8467 
8468     // Handle the case where the load remains an extending load even
8469     // after truncation.
8470     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8471       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8472       if (!LN0->isVolatile() &&
8473           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8474         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8475                                          VT, LN0->getChain(), LN0->getBasePtr(),
8476                                          LN0->getMemoryVT(),
8477                                          LN0->getMemOperand());
8478         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8479         return NewLoad;
8480       }
8481     }
8482   }
8483 
8484   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8485   // where ... are all 'undef'.
8486   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8487     SmallVector<EVT, 8> VTs;
8488     SDValue V;
8489     unsigned Idx = 0;
8490     unsigned NumDefs = 0;
8491 
8492     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8493       SDValue X = N0.getOperand(i);
8494       if (!X.isUndef()) {
8495         V = X;
8496         Idx = i;
8497         NumDefs++;
8498       }
8499       // Stop if more than one members are non-undef.
8500       if (NumDefs > 1)
8501         break;
8502       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8503                                      VT.getVectorElementType(),
8504                                      X.getValueType().getVectorNumElements()));
8505     }
8506 
8507     if (NumDefs == 0)
8508       return DAG.getUNDEF(VT);
8509 
8510     if (NumDefs == 1) {
8511       assert(V.getNode() && "The single defined operand is empty!");
8512       SmallVector<SDValue, 8> Opnds;
8513       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8514         if (i != Idx) {
8515           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8516           continue;
8517         }
8518         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8519         AddToWorklist(NV.getNode());
8520         Opnds.push_back(NV);
8521       }
8522       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8523     }
8524   }
8525 
8526   // Fold truncate of a bitcast of a vector to an extract of the low vector
8527   // element.
8528   //
8529   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8530   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8531     SDValue VecSrc = N0.getOperand(0);
8532     EVT SrcVT = VecSrc.getValueType();
8533     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8534         (!LegalOperations ||
8535          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8536       SDLoc SL(N);
8537 
8538       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8539       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8540       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8541                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8542     }
8543   }
8544 
8545   // Simplify the operands using demanded-bits information.
8546   if (!VT.isVector() &&
8547       SimplifyDemandedBits(SDValue(N, 0)))
8548     return SDValue(N, 0);
8549 
8550   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8551   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8552   // When the adde's carry is not used.
8553   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8554       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8555       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8556     SDLoc SL(N);
8557     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8558     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8559     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8560     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8561   }
8562 
8563   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8564     return NewVSel;
8565 
8566   return SDValue();
8567 }
8568 
8569 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8570   SDValue Elt = N->getOperand(i);
8571   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8572     return Elt.getNode();
8573   return Elt.getOperand(Elt.getResNo()).getNode();
8574 }
8575 
8576 /// build_pair (load, load) -> load
8577 /// if load locations are consecutive.
8578 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8579   assert(N->getOpcode() == ISD::BUILD_PAIR);
8580 
8581   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8582   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8583   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8584       LD1->getAddressSpace() != LD2->getAddressSpace())
8585     return SDValue();
8586   EVT LD1VT = LD1->getValueType(0);
8587   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8588   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8589       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8590     unsigned Align = LD1->getAlignment();
8591     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8592         VT.getTypeForEVT(*DAG.getContext()));
8593 
8594     if (NewAlign <= Align &&
8595         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8596       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8597                          LD1->getPointerInfo(), Align);
8598   }
8599 
8600   return SDValue();
8601 }
8602 
8603 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8604   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8605   // and Lo parts; on big-endian machines it doesn't.
8606   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8607 }
8608 
8609 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8610                                     const TargetLowering &TLI) {
8611   // If this is not a bitcast to an FP type or if the target doesn't have
8612   // IEEE754-compliant FP logic, we're done.
8613   EVT VT = N->getValueType(0);
8614   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8615     return SDValue();
8616 
8617   // TODO: Use splat values for the constant-checking below and remove this
8618   // restriction.
8619   SDValue N0 = N->getOperand(0);
8620   EVT SourceVT = N0.getValueType();
8621   if (SourceVT.isVector())
8622     return SDValue();
8623 
8624   unsigned FPOpcode;
8625   APInt SignMask;
8626   switch (N0.getOpcode()) {
8627   case ISD::AND:
8628     FPOpcode = ISD::FABS;
8629     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8630     break;
8631   case ISD::XOR:
8632     FPOpcode = ISD::FNEG;
8633     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8634     break;
8635   // TODO: ISD::OR --> ISD::FNABS?
8636   default:
8637     return SDValue();
8638   }
8639 
8640   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8641   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8642   SDValue LogicOp0 = N0.getOperand(0);
8643   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8644   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8645       LogicOp0.getOpcode() == ISD::BITCAST &&
8646       LogicOp0->getOperand(0).getValueType() == VT)
8647     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8648 
8649   return SDValue();
8650 }
8651 
8652 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8653   SDValue N0 = N->getOperand(0);
8654   EVT VT = N->getValueType(0);
8655 
8656   if (N0.isUndef())
8657     return DAG.getUNDEF(VT);
8658 
8659   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8660   // Only do this before legalize, since afterward the target may be depending
8661   // on the bitconvert.
8662   // First check to see if this is all constant.
8663   if (!LegalTypes &&
8664       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8665       VT.isVector()) {
8666     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8667 
8668     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8669     assert(!DestEltVT.isVector() &&
8670            "Element type of vector ValueType must not be vector!");
8671     if (isSimple)
8672       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8673   }
8674 
8675   // If the input is a constant, let getNode fold it.
8676   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8677     // If we can't allow illegal operations, we need to check that this is just
8678     // a fp -> int or int -> conversion and that the resulting operation will
8679     // be legal.
8680     if (!LegalOperations ||
8681         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8682          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8683         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8684          TLI.isOperationLegal(ISD::Constant, VT)))
8685       return DAG.getBitcast(VT, N0);
8686   }
8687 
8688   // (conv (conv x, t1), t2) -> (conv x, t2)
8689   if (N0.getOpcode() == ISD::BITCAST)
8690     return DAG.getBitcast(VT, N0.getOperand(0));
8691 
8692   // fold (conv (load x)) -> (load (conv*)x)
8693   // If the resultant load doesn't need a higher alignment than the original!
8694   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8695       // Do not change the width of a volatile load.
8696       !cast<LoadSDNode>(N0)->isVolatile() &&
8697       // Do not remove the cast if the types differ in endian layout.
8698       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8699           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8700       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8701       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8702     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8703     unsigned OrigAlign = LN0->getAlignment();
8704 
8705     bool Fast = false;
8706     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8707                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8708         Fast) {
8709       SDValue Load =
8710           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8711                       LN0->getPointerInfo(), OrigAlign,
8712                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8713       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8714       return Load;
8715     }
8716   }
8717 
8718   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8719     return V;
8720 
8721   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8722   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8723   //
8724   // For ppc_fp128:
8725   // fold (bitcast (fneg x)) ->
8726   //     flipbit = signbit
8727   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8728   //
8729   // fold (bitcast (fabs x)) ->
8730   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8731   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8732   // This often reduces constant pool loads.
8733   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8734        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8735       N0.getNode()->hasOneUse() && VT.isInteger() &&
8736       !VT.isVector() && !N0.getValueType().isVector()) {
8737     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8738     AddToWorklist(NewConv.getNode());
8739 
8740     SDLoc DL(N);
8741     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8742       assert(VT.getSizeInBits() == 128);
8743       SDValue SignBit = DAG.getConstant(
8744           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8745       SDValue FlipBit;
8746       if (N0.getOpcode() == ISD::FNEG) {
8747         FlipBit = SignBit;
8748         AddToWorklist(FlipBit.getNode());
8749       } else {
8750         assert(N0.getOpcode() == ISD::FABS);
8751         SDValue Hi =
8752             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8753                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8754                                               SDLoc(NewConv)));
8755         AddToWorklist(Hi.getNode());
8756         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8757         AddToWorklist(FlipBit.getNode());
8758       }
8759       SDValue FlipBits =
8760           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8761       AddToWorklist(FlipBits.getNode());
8762       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8763     }
8764     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8765     if (N0.getOpcode() == ISD::FNEG)
8766       return DAG.getNode(ISD::XOR, DL, VT,
8767                          NewConv, DAG.getConstant(SignBit, DL, VT));
8768     assert(N0.getOpcode() == ISD::FABS);
8769     return DAG.getNode(ISD::AND, DL, VT,
8770                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8771   }
8772 
8773   // fold (bitconvert (fcopysign cst, x)) ->
8774   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8775   // Note that we don't handle (copysign x, cst) because this can always be
8776   // folded to an fneg or fabs.
8777   //
8778   // For ppc_fp128:
8779   // fold (bitcast (fcopysign cst, x)) ->
8780   //     flipbit = (and (extract_element
8781   //                     (xor (bitcast cst), (bitcast x)), 0),
8782   //                    signbit)
8783   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8784   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8785       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8786       VT.isInteger() && !VT.isVector()) {
8787     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8788     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8789     if (isTypeLegal(IntXVT)) {
8790       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8791       AddToWorklist(X.getNode());
8792 
8793       // If X has a different width than the result/lhs, sext it or truncate it.
8794       unsigned VTWidth = VT.getSizeInBits();
8795       if (OrigXWidth < VTWidth) {
8796         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8797         AddToWorklist(X.getNode());
8798       } else if (OrigXWidth > VTWidth) {
8799         // To get the sign bit in the right place, we have to shift it right
8800         // before truncating.
8801         SDLoc DL(X);
8802         X = DAG.getNode(ISD::SRL, DL,
8803                         X.getValueType(), X,
8804                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8805                                         X.getValueType()));
8806         AddToWorklist(X.getNode());
8807         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8808         AddToWorklist(X.getNode());
8809       }
8810 
8811       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8812         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8813         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8814         AddToWorklist(Cst.getNode());
8815         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8816         AddToWorklist(X.getNode());
8817         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8818         AddToWorklist(XorResult.getNode());
8819         SDValue XorResult64 = DAG.getNode(
8820             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8821             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8822                                   SDLoc(XorResult)));
8823         AddToWorklist(XorResult64.getNode());
8824         SDValue FlipBit =
8825             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8826                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8827         AddToWorklist(FlipBit.getNode());
8828         SDValue FlipBits =
8829             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8830         AddToWorklist(FlipBits.getNode());
8831         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8832       }
8833       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8834       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8835                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8836       AddToWorklist(X.getNode());
8837 
8838       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8839       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8840                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8841       AddToWorklist(Cst.getNode());
8842 
8843       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8844     }
8845   }
8846 
8847   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8848   if (N0.getOpcode() == ISD::BUILD_PAIR)
8849     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8850       return CombineLD;
8851 
8852   // Remove double bitcasts from shuffles - this is often a legacy of
8853   // XformToShuffleWithZero being used to combine bitmaskings (of
8854   // float vectors bitcast to integer vectors) into shuffles.
8855   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8856   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8857       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8858       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8859       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8860     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8861 
8862     // If operands are a bitcast, peek through if it casts the original VT.
8863     // If operands are a constant, just bitcast back to original VT.
8864     auto PeekThroughBitcast = [&](SDValue Op) {
8865       if (Op.getOpcode() == ISD::BITCAST &&
8866           Op.getOperand(0).getValueType() == VT)
8867         return SDValue(Op.getOperand(0));
8868       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8869           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8870         return DAG.getBitcast(VT, Op);
8871       return SDValue();
8872     };
8873 
8874     // FIXME: If either input vector is bitcast, try to convert the shuffle to
8875     // the result type of this bitcast. This would eliminate at least one
8876     // bitcast. See the transform in InstCombine.
8877     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8878     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8879     if (!(SV0 && SV1))
8880       return SDValue();
8881 
8882     int MaskScale =
8883         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8884     SmallVector<int, 8> NewMask;
8885     for (int M : SVN->getMask())
8886       for (int i = 0; i != MaskScale; ++i)
8887         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8888 
8889     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8890     if (!LegalMask) {
8891       std::swap(SV0, SV1);
8892       ShuffleVectorSDNode::commuteMask(NewMask);
8893       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8894     }
8895 
8896     if (LegalMask)
8897       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8898   }
8899 
8900   return SDValue();
8901 }
8902 
8903 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8904   EVT VT = N->getValueType(0);
8905   return CombineConsecutiveLoads(N, VT);
8906 }
8907 
8908 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8909 /// operands. DstEltVT indicates the destination element value type.
8910 SDValue DAGCombiner::
8911 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8912   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8913 
8914   // If this is already the right type, we're done.
8915   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8916 
8917   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8918   unsigned DstBitSize = DstEltVT.getSizeInBits();
8919 
8920   // If this is a conversion of N elements of one type to N elements of another
8921   // type, convert each element.  This handles FP<->INT cases.
8922   if (SrcBitSize == DstBitSize) {
8923     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8924                               BV->getValueType(0).getVectorNumElements());
8925 
8926     // Due to the FP element handling below calling this routine recursively,
8927     // we can end up with a scalar-to-vector node here.
8928     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8929       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8930                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8931 
8932     SmallVector<SDValue, 8> Ops;
8933     for (SDValue Op : BV->op_values()) {
8934       // If the vector element type is not legal, the BUILD_VECTOR operands
8935       // are promoted and implicitly truncated.  Make that explicit here.
8936       if (Op.getValueType() != SrcEltVT)
8937         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8938       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8939       AddToWorklist(Ops.back().getNode());
8940     }
8941     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8942   }
8943 
8944   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8945   // handle annoying details of growing/shrinking FP values, we convert them to
8946   // int first.
8947   if (SrcEltVT.isFloatingPoint()) {
8948     // Convert the input float vector to a int vector where the elements are the
8949     // same sizes.
8950     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8951     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8952     SrcEltVT = IntVT;
8953   }
8954 
8955   // Now we know the input is an integer vector.  If the output is a FP type,
8956   // convert to integer first, then to FP of the right size.
8957   if (DstEltVT.isFloatingPoint()) {
8958     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8959     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8960 
8961     // Next, convert to FP elements of the same size.
8962     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8963   }
8964 
8965   SDLoc DL(BV);
8966 
8967   // Okay, we know the src/dst types are both integers of differing types.
8968   // Handling growing first.
8969   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8970   if (SrcBitSize < DstBitSize) {
8971     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8972 
8973     SmallVector<SDValue, 8> Ops;
8974     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8975          i += NumInputsPerOutput) {
8976       bool isLE = DAG.getDataLayout().isLittleEndian();
8977       APInt NewBits = APInt(DstBitSize, 0);
8978       bool EltIsUndef = true;
8979       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8980         // Shift the previously computed bits over.
8981         NewBits <<= SrcBitSize;
8982         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8983         if (Op.isUndef()) continue;
8984         EltIsUndef = false;
8985 
8986         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8987                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8988       }
8989 
8990       if (EltIsUndef)
8991         Ops.push_back(DAG.getUNDEF(DstEltVT));
8992       else
8993         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8994     }
8995 
8996     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8997     return DAG.getBuildVector(VT, DL, Ops);
8998   }
8999 
9000   // Finally, this must be the case where we are shrinking elements: each input
9001   // turns into multiple outputs.
9002   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9003   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9004                             NumOutputsPerInput*BV->getNumOperands());
9005   SmallVector<SDValue, 8> Ops;
9006 
9007   for (const SDValue &Op : BV->op_values()) {
9008     if (Op.isUndef()) {
9009       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9010       continue;
9011     }
9012 
9013     APInt OpVal = cast<ConstantSDNode>(Op)->
9014                   getAPIntValue().zextOrTrunc(SrcBitSize);
9015 
9016     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9017       APInt ThisVal = OpVal.trunc(DstBitSize);
9018       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9019       OpVal.lshrInPlace(DstBitSize);
9020     }
9021 
9022     // For big endian targets, swap the order of the pieces of each element.
9023     if (DAG.getDataLayout().isBigEndian())
9024       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9025   }
9026 
9027   return DAG.getBuildVector(VT, DL, Ops);
9028 }
9029 
9030 static bool isContractable(SDNode *N) {
9031   SDNodeFlags F = N->getFlags();
9032   return F.hasAllowContract() || F.hasUnsafeAlgebra();
9033 }
9034 
9035 /// Try to perform FMA combining on a given FADD node.
9036 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9037   SDValue N0 = N->getOperand(0);
9038   SDValue N1 = N->getOperand(1);
9039   EVT VT = N->getValueType(0);
9040   SDLoc SL(N);
9041 
9042   const TargetOptions &Options = DAG.getTarget().Options;
9043 
9044   // Floating-point multiply-add with intermediate rounding.
9045   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9046 
9047   // Floating-point multiply-add without intermediate rounding.
9048   bool HasFMA =
9049       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9050       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9051 
9052   // No valid opcode, do not combine.
9053   if (!HasFMAD && !HasFMA)
9054     return SDValue();
9055 
9056   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9057                               Options.UnsafeFPMath || HasFMAD);
9058   // If the addition is not contractable, do not combine.
9059   if (!AllowFusionGlobally && !isContractable(N))
9060     return SDValue();
9061 
9062   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9063   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9064     return SDValue();
9065 
9066   // Always prefer FMAD to FMA for precision.
9067   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9068   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9069   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9070 
9071   // Is the node an FMUL and contractable either due to global flags or
9072   // SDNodeFlags.
9073   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9074     if (N.getOpcode() != ISD::FMUL)
9075       return false;
9076     return AllowFusionGlobally || isContractable(N.getNode());
9077   };
9078   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9079   // prefer to fold the multiply with fewer uses.
9080   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9081     if (N0.getNode()->use_size() > N1.getNode()->use_size())
9082       std::swap(N0, N1);
9083   }
9084 
9085   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9086   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9087     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9088                        N0.getOperand(0), N0.getOperand(1), N1);
9089   }
9090 
9091   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9092   // Note: Commutes FADD operands.
9093   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9094     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9095                        N1.getOperand(0), N1.getOperand(1), N0);
9096   }
9097 
9098   // Look through FP_EXTEND nodes to do more combining.
9099   if (LookThroughFPExt) {
9100     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9101     if (N0.getOpcode() == ISD::FP_EXTEND) {
9102       SDValue N00 = N0.getOperand(0);
9103       if (isContractableFMUL(N00))
9104         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9105                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9106                                        N00.getOperand(0)),
9107                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9108                                        N00.getOperand(1)), N1);
9109     }
9110 
9111     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9112     // Note: Commutes FADD operands.
9113     if (N1.getOpcode() == ISD::FP_EXTEND) {
9114       SDValue N10 = N1.getOperand(0);
9115       if (isContractableFMUL(N10))
9116         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9117                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9118                                        N10.getOperand(0)),
9119                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9120                                        N10.getOperand(1)), N0);
9121     }
9122   }
9123 
9124   // More folding opportunities when target permits.
9125   if (Aggressive) {
9126     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9127     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9128     // are currently only supported on binary nodes.
9129     if (Options.UnsafeFPMath &&
9130         N0.getOpcode() == PreferredFusedOpcode &&
9131         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9132         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9133       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9134                          N0.getOperand(0), N0.getOperand(1),
9135                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9136                                      N0.getOperand(2).getOperand(0),
9137                                      N0.getOperand(2).getOperand(1),
9138                                      N1));
9139     }
9140 
9141     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9142     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9143     // are currently only supported on binary nodes.
9144     if (Options.UnsafeFPMath &&
9145         N1->getOpcode() == PreferredFusedOpcode &&
9146         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9147         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9148       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9149                          N1.getOperand(0), N1.getOperand(1),
9150                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9151                                      N1.getOperand(2).getOperand(0),
9152                                      N1.getOperand(2).getOperand(1),
9153                                      N0));
9154     }
9155 
9156     if (LookThroughFPExt) {
9157       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9158       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9159       auto FoldFAddFMAFPExtFMul = [&] (
9160           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9161         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9162                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9163                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9164                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9165                                        Z));
9166       };
9167       if (N0.getOpcode() == PreferredFusedOpcode) {
9168         SDValue N02 = N0.getOperand(2);
9169         if (N02.getOpcode() == ISD::FP_EXTEND) {
9170           SDValue N020 = N02.getOperand(0);
9171           if (isContractableFMUL(N020))
9172             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9173                                         N020.getOperand(0), N020.getOperand(1),
9174                                         N1);
9175         }
9176       }
9177 
9178       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9179       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9180       // FIXME: This turns two single-precision and one double-precision
9181       // operation into two double-precision operations, which might not be
9182       // interesting for all targets, especially GPUs.
9183       auto FoldFAddFPExtFMAFMul = [&] (
9184           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9185         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9186                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9187                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9188                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9189                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9190                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9191                                        Z));
9192       };
9193       if (N0.getOpcode() == ISD::FP_EXTEND) {
9194         SDValue N00 = N0.getOperand(0);
9195         if (N00.getOpcode() == PreferredFusedOpcode) {
9196           SDValue N002 = N00.getOperand(2);
9197           if (isContractableFMUL(N002))
9198             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9199                                         N002.getOperand(0), N002.getOperand(1),
9200                                         N1);
9201         }
9202       }
9203 
9204       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9205       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9206       if (N1.getOpcode() == PreferredFusedOpcode) {
9207         SDValue N12 = N1.getOperand(2);
9208         if (N12.getOpcode() == ISD::FP_EXTEND) {
9209           SDValue N120 = N12.getOperand(0);
9210           if (isContractableFMUL(N120))
9211             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9212                                         N120.getOperand(0), N120.getOperand(1),
9213                                         N0);
9214         }
9215       }
9216 
9217       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9218       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9219       // FIXME: This turns two single-precision and one double-precision
9220       // operation into two double-precision operations, which might not be
9221       // interesting for all targets, especially GPUs.
9222       if (N1.getOpcode() == ISD::FP_EXTEND) {
9223         SDValue N10 = N1.getOperand(0);
9224         if (N10.getOpcode() == PreferredFusedOpcode) {
9225           SDValue N102 = N10.getOperand(2);
9226           if (isContractableFMUL(N102))
9227             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9228                                         N102.getOperand(0), N102.getOperand(1),
9229                                         N0);
9230         }
9231       }
9232     }
9233   }
9234 
9235   return SDValue();
9236 }
9237 
9238 /// Try to perform FMA combining on a given FSUB node.
9239 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9240   SDValue N0 = N->getOperand(0);
9241   SDValue N1 = N->getOperand(1);
9242   EVT VT = N->getValueType(0);
9243   SDLoc SL(N);
9244 
9245   const TargetOptions &Options = DAG.getTarget().Options;
9246   // Floating-point multiply-add with intermediate rounding.
9247   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9248 
9249   // Floating-point multiply-add without intermediate rounding.
9250   bool HasFMA =
9251       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9252       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9253 
9254   // No valid opcode, do not combine.
9255   if (!HasFMAD && !HasFMA)
9256     return SDValue();
9257 
9258   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9259                               Options.UnsafeFPMath || HasFMAD);
9260   // If the subtraction is not contractable, do not combine.
9261   if (!AllowFusionGlobally && !isContractable(N))
9262     return SDValue();
9263 
9264   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9265   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9266     return SDValue();
9267 
9268   // Always prefer FMAD to FMA for precision.
9269   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9270   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9271   bool LookThroughFPExt = TLI.isFPExtFree(VT);
9272 
9273   // Is the node an FMUL and contractable either due to global flags or
9274   // SDNodeFlags.
9275   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9276     if (N.getOpcode() != ISD::FMUL)
9277       return false;
9278     return AllowFusionGlobally || isContractable(N.getNode());
9279   };
9280 
9281   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9282   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9283     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9284                        N0.getOperand(0), N0.getOperand(1),
9285                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9286   }
9287 
9288   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9289   // Note: Commutes FSUB operands.
9290   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9291     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9292                        DAG.getNode(ISD::FNEG, SL, VT,
9293                                    N1.getOperand(0)),
9294                        N1.getOperand(1), N0);
9295 
9296   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9297   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9298       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9299     SDValue N00 = N0.getOperand(0).getOperand(0);
9300     SDValue N01 = N0.getOperand(0).getOperand(1);
9301     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9302                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9303                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9304   }
9305 
9306   // Look through FP_EXTEND nodes to do more combining.
9307   if (LookThroughFPExt) {
9308     // fold (fsub (fpext (fmul x, y)), z)
9309     //   -> (fma (fpext x), (fpext y), (fneg z))
9310     if (N0.getOpcode() == ISD::FP_EXTEND) {
9311       SDValue N00 = N0.getOperand(0);
9312       if (isContractableFMUL(N00))
9313         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9314                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9315                                        N00.getOperand(0)),
9316                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9317                                        N00.getOperand(1)),
9318                            DAG.getNode(ISD::FNEG, SL, VT, N1));
9319     }
9320 
9321     // fold (fsub x, (fpext (fmul y, z)))
9322     //   -> (fma (fneg (fpext y)), (fpext z), x)
9323     // Note: Commutes FSUB operands.
9324     if (N1.getOpcode() == ISD::FP_EXTEND) {
9325       SDValue N10 = N1.getOperand(0);
9326       if (isContractableFMUL(N10))
9327         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9328                            DAG.getNode(ISD::FNEG, SL, VT,
9329                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9330                                                    N10.getOperand(0))),
9331                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9332                                        N10.getOperand(1)),
9333                            N0);
9334     }
9335 
9336     // fold (fsub (fpext (fneg (fmul, x, y))), z)
9337     //   -> (fneg (fma (fpext x), (fpext y), z))
9338     // Note: This could be removed with appropriate canonicalization of the
9339     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9340     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9341     // from implementing the canonicalization in visitFSUB.
9342     if (N0.getOpcode() == ISD::FP_EXTEND) {
9343       SDValue N00 = N0.getOperand(0);
9344       if (N00.getOpcode() == ISD::FNEG) {
9345         SDValue N000 = N00.getOperand(0);
9346         if (isContractableFMUL(N000)) {
9347           return DAG.getNode(ISD::FNEG, SL, VT,
9348                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9349                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9350                                                      N000.getOperand(0)),
9351                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9352                                                      N000.getOperand(1)),
9353                                          N1));
9354         }
9355       }
9356     }
9357 
9358     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9359     //   -> (fneg (fma (fpext x)), (fpext y), z)
9360     // Note: This could be removed with appropriate canonicalization of the
9361     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9362     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9363     // from implementing the canonicalization in visitFSUB.
9364     if (N0.getOpcode() == ISD::FNEG) {
9365       SDValue N00 = N0.getOperand(0);
9366       if (N00.getOpcode() == ISD::FP_EXTEND) {
9367         SDValue N000 = N00.getOperand(0);
9368         if (isContractableFMUL(N000)) {
9369           return DAG.getNode(ISD::FNEG, SL, VT,
9370                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9371                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9372                                                      N000.getOperand(0)),
9373                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9374                                                      N000.getOperand(1)),
9375                                          N1));
9376         }
9377       }
9378     }
9379 
9380   }
9381 
9382   // More folding opportunities when target permits.
9383   if (Aggressive) {
9384     // fold (fsub (fma x, y, (fmul u, v)), z)
9385     //   -> (fma x, y (fma u, v, (fneg z)))
9386     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9387     // are currently only supported on binary nodes.
9388     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9389         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9390         N0.getOperand(2)->hasOneUse()) {
9391       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9392                          N0.getOperand(0), N0.getOperand(1),
9393                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9394                                      N0.getOperand(2).getOperand(0),
9395                                      N0.getOperand(2).getOperand(1),
9396                                      DAG.getNode(ISD::FNEG, SL, VT,
9397                                                  N1)));
9398     }
9399 
9400     // fold (fsub x, (fma y, z, (fmul u, v)))
9401     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9402     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9403     // are currently only supported on binary nodes.
9404     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9405         isContractableFMUL(N1.getOperand(2))) {
9406       SDValue N20 = N1.getOperand(2).getOperand(0);
9407       SDValue N21 = N1.getOperand(2).getOperand(1);
9408       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9409                          DAG.getNode(ISD::FNEG, SL, VT,
9410                                      N1.getOperand(0)),
9411                          N1.getOperand(1),
9412                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9413                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9414 
9415                                      N21, N0));
9416     }
9417 
9418     if (LookThroughFPExt) {
9419       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9420       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9421       if (N0.getOpcode() == PreferredFusedOpcode) {
9422         SDValue N02 = N0.getOperand(2);
9423         if (N02.getOpcode() == ISD::FP_EXTEND) {
9424           SDValue N020 = N02.getOperand(0);
9425           if (isContractableFMUL(N020))
9426             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9427                                N0.getOperand(0), N0.getOperand(1),
9428                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9429                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9430                                                        N020.getOperand(0)),
9431                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9432                                                        N020.getOperand(1)),
9433                                            DAG.getNode(ISD::FNEG, SL, VT,
9434                                                        N1)));
9435         }
9436       }
9437 
9438       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9439       //   -> (fma (fpext x), (fpext y),
9440       //           (fma (fpext u), (fpext v), (fneg z)))
9441       // FIXME: This turns two single-precision and one double-precision
9442       // operation into two double-precision operations, which might not be
9443       // interesting for all targets, especially GPUs.
9444       if (N0.getOpcode() == ISD::FP_EXTEND) {
9445         SDValue N00 = N0.getOperand(0);
9446         if (N00.getOpcode() == PreferredFusedOpcode) {
9447           SDValue N002 = N00.getOperand(2);
9448           if (isContractableFMUL(N002))
9449             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9450                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9451                                            N00.getOperand(0)),
9452                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9453                                            N00.getOperand(1)),
9454                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9455                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9456                                                        N002.getOperand(0)),
9457                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9458                                                        N002.getOperand(1)),
9459                                            DAG.getNode(ISD::FNEG, SL, VT,
9460                                                        N1)));
9461         }
9462       }
9463 
9464       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9465       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9466       if (N1.getOpcode() == PreferredFusedOpcode &&
9467         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9468         SDValue N120 = N1.getOperand(2).getOperand(0);
9469         if (isContractableFMUL(N120)) {
9470           SDValue N1200 = N120.getOperand(0);
9471           SDValue N1201 = N120.getOperand(1);
9472           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9473                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9474                              N1.getOperand(1),
9475                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9476                                          DAG.getNode(ISD::FNEG, SL, VT,
9477                                              DAG.getNode(ISD::FP_EXTEND, SL,
9478                                                          VT, N1200)),
9479                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9480                                                      N1201),
9481                                          N0));
9482         }
9483       }
9484 
9485       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9486       //   -> (fma (fneg (fpext y)), (fpext z),
9487       //           (fma (fneg (fpext u)), (fpext v), x))
9488       // FIXME: This turns two single-precision and one double-precision
9489       // operation into two double-precision operations, which might not be
9490       // interesting for all targets, especially GPUs.
9491       if (N1.getOpcode() == ISD::FP_EXTEND &&
9492         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9493         SDValue N100 = N1.getOperand(0).getOperand(0);
9494         SDValue N101 = N1.getOperand(0).getOperand(1);
9495         SDValue N102 = N1.getOperand(0).getOperand(2);
9496         if (isContractableFMUL(N102)) {
9497           SDValue N1020 = N102.getOperand(0);
9498           SDValue N1021 = N102.getOperand(1);
9499           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9500                              DAG.getNode(ISD::FNEG, SL, VT,
9501                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9502                                                      N100)),
9503                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9504                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9505                                          DAG.getNode(ISD::FNEG, SL, VT,
9506                                              DAG.getNode(ISD::FP_EXTEND, SL,
9507                                                          VT, N1020)),
9508                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9509                                                      N1021),
9510                                          N0));
9511         }
9512       }
9513     }
9514   }
9515 
9516   return SDValue();
9517 }
9518 
9519 /// Try to perform FMA combining on a given FMUL node based on the distributive
9520 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9521 /// subtraction instead of addition).
9522 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9523   SDValue N0 = N->getOperand(0);
9524   SDValue N1 = N->getOperand(1);
9525   EVT VT = N->getValueType(0);
9526   SDLoc SL(N);
9527 
9528   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9529 
9530   const TargetOptions &Options = DAG.getTarget().Options;
9531 
9532   // The transforms below are incorrect when x == 0 and y == inf, because the
9533   // intermediate multiplication produces a nan.
9534   if (!Options.NoInfsFPMath)
9535     return SDValue();
9536 
9537   // Floating-point multiply-add without intermediate rounding.
9538   bool HasFMA =
9539       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9540       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9541       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9542 
9543   // Floating-point multiply-add with intermediate rounding. This can result
9544   // in a less precise result due to the changed rounding order.
9545   bool HasFMAD = Options.UnsafeFPMath &&
9546                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9547 
9548   // No valid opcode, do not combine.
9549   if (!HasFMAD && !HasFMA)
9550     return SDValue();
9551 
9552   // Always prefer FMAD to FMA for precision.
9553   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9554   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9555 
9556   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9557   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9558   auto FuseFADD = [&](SDValue X, SDValue Y) {
9559     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9560       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9561       if (XC1 && XC1->isExactlyValue(+1.0))
9562         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9563       if (XC1 && XC1->isExactlyValue(-1.0))
9564         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9565                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9566     }
9567     return SDValue();
9568   };
9569 
9570   if (SDValue FMA = FuseFADD(N0, N1))
9571     return FMA;
9572   if (SDValue FMA = FuseFADD(N1, N0))
9573     return FMA;
9574 
9575   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9576   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9577   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9578   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9579   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9580     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9581       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9582       if (XC0 && XC0->isExactlyValue(+1.0))
9583         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9584                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9585                            Y);
9586       if (XC0 && XC0->isExactlyValue(-1.0))
9587         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9588                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9589                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9590 
9591       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9592       if (XC1 && XC1->isExactlyValue(+1.0))
9593         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9594                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9595       if (XC1 && XC1->isExactlyValue(-1.0))
9596         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9597     }
9598     return SDValue();
9599   };
9600 
9601   if (SDValue FMA = FuseFSUB(N0, N1))
9602     return FMA;
9603   if (SDValue FMA = FuseFSUB(N1, N0))
9604     return FMA;
9605 
9606   return SDValue();
9607 }
9608 
9609 static bool isFMulNegTwo(SDValue &N) {
9610   if (N.getOpcode() != ISD::FMUL)
9611     return false;
9612   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9613     return CFP->isExactlyValue(-2.0);
9614   return false;
9615 }
9616 
9617 SDValue DAGCombiner::visitFADD(SDNode *N) {
9618   SDValue N0 = N->getOperand(0);
9619   SDValue N1 = N->getOperand(1);
9620   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9621   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9622   EVT VT = N->getValueType(0);
9623   SDLoc DL(N);
9624   const TargetOptions &Options = DAG.getTarget().Options;
9625   const SDNodeFlags Flags = N->getFlags();
9626 
9627   // fold vector ops
9628   if (VT.isVector())
9629     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9630       return FoldedVOp;
9631 
9632   // fold (fadd c1, c2) -> c1 + c2
9633   if (N0CFP && N1CFP)
9634     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9635 
9636   // canonicalize constant to RHS
9637   if (N0CFP && !N1CFP)
9638     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9639 
9640   if (SDValue NewSel = foldBinOpIntoSelect(N))
9641     return NewSel;
9642 
9643   // fold (fadd A, (fneg B)) -> (fsub A, B)
9644   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9645       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9646     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9647                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9648 
9649   // fold (fadd (fneg A), B) -> (fsub B, A)
9650   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9651       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9652     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9653                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9654 
9655   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9656   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9657   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9658       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9659     bool N1IsFMul = isFMulNegTwo(N1);
9660     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9661     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9662     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9663   }
9664 
9665   // FIXME: Auto-upgrade the target/function-level option.
9666   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9667     // fold (fadd A, 0) -> A
9668     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9669       if (N1C->isZero())
9670         return N0;
9671   }
9672 
9673   // If 'unsafe math' is enabled, fold lots of things.
9674   if (Options.UnsafeFPMath) {
9675     // No FP constant should be created after legalization as Instruction
9676     // Selection pass has a hard time dealing with FP constants.
9677     bool AllowNewConst = (Level < AfterLegalizeDAG);
9678 
9679     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9680     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9681         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9682       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9683                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9684                                      Flags),
9685                          Flags);
9686 
9687     // If allowed, fold (fadd (fneg x), x) -> 0.0
9688     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9689       return DAG.getConstantFP(0.0, DL, VT);
9690 
9691     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9692     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9693       return DAG.getConstantFP(0.0, DL, VT);
9694 
9695     // We can fold chains of FADD's of the same value into multiplications.
9696     // This transform is not safe in general because we are reducing the number
9697     // of rounding steps.
9698     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9699       if (N0.getOpcode() == ISD::FMUL) {
9700         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9701         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9702 
9703         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9704         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9705           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9706                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9707           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9708         }
9709 
9710         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9711         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9712             N1.getOperand(0) == N1.getOperand(1) &&
9713             N0.getOperand(0) == N1.getOperand(0)) {
9714           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9715                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9716           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9717         }
9718       }
9719 
9720       if (N1.getOpcode() == ISD::FMUL) {
9721         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9722         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9723 
9724         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9725         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9726           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9727                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9728           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9729         }
9730 
9731         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9732         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9733             N0.getOperand(0) == N0.getOperand(1) &&
9734             N1.getOperand(0) == N0.getOperand(0)) {
9735           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9736                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9737           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9738         }
9739       }
9740 
9741       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9742         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9743         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9744         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9745             (N0.getOperand(0) == N1)) {
9746           return DAG.getNode(ISD::FMUL, DL, VT,
9747                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9748         }
9749       }
9750 
9751       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9752         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9753         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9754         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9755             N1.getOperand(0) == N0) {
9756           return DAG.getNode(ISD::FMUL, DL, VT,
9757                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9758         }
9759       }
9760 
9761       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9762       if (AllowNewConst &&
9763           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9764           N0.getOperand(0) == N0.getOperand(1) &&
9765           N1.getOperand(0) == N1.getOperand(1) &&
9766           N0.getOperand(0) == N1.getOperand(0)) {
9767         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9768                            DAG.getConstantFP(4.0, DL, VT), Flags);
9769       }
9770     }
9771   } // enable-unsafe-fp-math
9772 
9773   // FADD -> FMA combines:
9774   if (SDValue Fused = visitFADDForFMACombine(N)) {
9775     AddToWorklist(Fused.getNode());
9776     return Fused;
9777   }
9778   return SDValue();
9779 }
9780 
9781 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9782   SDValue N0 = N->getOperand(0);
9783   SDValue N1 = N->getOperand(1);
9784   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9785   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9786   EVT VT = N->getValueType(0);
9787   SDLoc DL(N);
9788   const TargetOptions &Options = DAG.getTarget().Options;
9789   const SDNodeFlags Flags = N->getFlags();
9790 
9791   // fold vector ops
9792   if (VT.isVector())
9793     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9794       return FoldedVOp;
9795 
9796   // fold (fsub c1, c2) -> c1-c2
9797   if (N0CFP && N1CFP)
9798     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9799 
9800   if (SDValue NewSel = foldBinOpIntoSelect(N))
9801     return NewSel;
9802 
9803   // fold (fsub A, (fneg B)) -> (fadd A, B)
9804   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9805     return DAG.getNode(ISD::FADD, DL, VT, N0,
9806                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9807 
9808   // FIXME: Auto-upgrade the target/function-level option.
9809   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
9810     // (fsub 0, B) -> -B
9811     if (N0CFP && N0CFP->isZero()) {
9812       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9813         return GetNegatedExpression(N1, DAG, LegalOperations);
9814       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9815         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9816     }
9817   }
9818 
9819   // If 'unsafe math' is enabled, fold lots of things.
9820   if (Options.UnsafeFPMath) {
9821     // (fsub A, 0) -> A
9822     if (N1CFP && N1CFP->isZero())
9823       return N0;
9824 
9825     // (fsub x, x) -> 0.0
9826     if (N0 == N1)
9827       return DAG.getConstantFP(0.0f, DL, VT);
9828 
9829     // (fsub x, (fadd x, y)) -> (fneg y)
9830     // (fsub x, (fadd y, x)) -> (fneg y)
9831     if (N1.getOpcode() == ISD::FADD) {
9832       SDValue N10 = N1->getOperand(0);
9833       SDValue N11 = N1->getOperand(1);
9834 
9835       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9836         return GetNegatedExpression(N11, DAG, LegalOperations);
9837 
9838       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9839         return GetNegatedExpression(N10, DAG, LegalOperations);
9840     }
9841   }
9842 
9843   // FSUB -> FMA combines:
9844   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9845     AddToWorklist(Fused.getNode());
9846     return Fused;
9847   }
9848 
9849   return SDValue();
9850 }
9851 
9852 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9853   SDValue N0 = N->getOperand(0);
9854   SDValue N1 = N->getOperand(1);
9855   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9856   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9857   EVT VT = N->getValueType(0);
9858   SDLoc DL(N);
9859   const TargetOptions &Options = DAG.getTarget().Options;
9860   const SDNodeFlags Flags = N->getFlags();
9861 
9862   // fold vector ops
9863   if (VT.isVector()) {
9864     // This just handles C1 * C2 for vectors. Other vector folds are below.
9865     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9866       return FoldedVOp;
9867   }
9868 
9869   // fold (fmul c1, c2) -> c1*c2
9870   if (N0CFP && N1CFP)
9871     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9872 
9873   // canonicalize constant to RHS
9874   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9875      !isConstantFPBuildVectorOrConstantFP(N1))
9876     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9877 
9878   // fold (fmul A, 1.0) -> A
9879   if (N1CFP && N1CFP->isExactlyValue(1.0))
9880     return N0;
9881 
9882   if (SDValue NewSel = foldBinOpIntoSelect(N))
9883     return NewSel;
9884 
9885   if (Options.UnsafeFPMath) {
9886     // fold (fmul A, 0) -> 0
9887     if (N1CFP && N1CFP->isZero())
9888       return N1;
9889 
9890     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9891     if (N0.getOpcode() == ISD::FMUL) {
9892       // Fold scalars or any vector constants (not just splats).
9893       // This fold is done in general by InstCombine, but extra fmul insts
9894       // may have been generated during lowering.
9895       SDValue N00 = N0.getOperand(0);
9896       SDValue N01 = N0.getOperand(1);
9897       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9898       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9899       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9900 
9901       // Check 1: Make sure that the first operand of the inner multiply is NOT
9902       // a constant. Otherwise, we may induce infinite looping.
9903       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9904         // Check 2: Make sure that the second operand of the inner multiply and
9905         // the second operand of the outer multiply are constants.
9906         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9907             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9908           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9909           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9910         }
9911       }
9912     }
9913 
9914     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9915     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9916     // during an early run of DAGCombiner can prevent folding with fmuls
9917     // inserted during lowering.
9918     if (N0.getOpcode() == ISD::FADD &&
9919         (N0.getOperand(0) == N0.getOperand(1)) &&
9920         N0.hasOneUse()) {
9921       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9922       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9923       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9924     }
9925   }
9926 
9927   // fold (fmul X, 2.0) -> (fadd X, X)
9928   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9929     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9930 
9931   // fold (fmul X, -1.0) -> (fneg X)
9932   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9933     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9934       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9935 
9936   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9937   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9938     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9939       // Both can be negated for free, check to see if at least one is cheaper
9940       // negated.
9941       if (LHSNeg == 2 || RHSNeg == 2)
9942         return DAG.getNode(ISD::FMUL, DL, VT,
9943                            GetNegatedExpression(N0, DAG, LegalOperations),
9944                            GetNegatedExpression(N1, DAG, LegalOperations),
9945                            Flags);
9946     }
9947   }
9948 
9949   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
9950   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
9951   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
9952       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
9953       TLI.isOperationLegal(ISD::FABS, VT)) {
9954     SDValue Select = N0, X = N1;
9955     if (Select.getOpcode() != ISD::SELECT)
9956       std::swap(Select, X);
9957 
9958     SDValue Cond = Select.getOperand(0);
9959     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
9960     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
9961 
9962     if (TrueOpnd && FalseOpnd &&
9963         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
9964         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
9965         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
9966       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9967       switch (CC) {
9968       default: break;
9969       case ISD::SETOLT:
9970       case ISD::SETULT:
9971       case ISD::SETOLE:
9972       case ISD::SETULE:
9973       case ISD::SETLT:
9974       case ISD::SETLE:
9975         std::swap(TrueOpnd, FalseOpnd);
9976         // Fall through
9977       case ISD::SETOGT:
9978       case ISD::SETUGT:
9979       case ISD::SETOGE:
9980       case ISD::SETUGE:
9981       case ISD::SETGT:
9982       case ISD::SETGE:
9983         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
9984             TLI.isOperationLegal(ISD::FNEG, VT))
9985           return DAG.getNode(ISD::FNEG, DL, VT,
9986                    DAG.getNode(ISD::FABS, DL, VT, X));
9987         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
9988           return DAG.getNode(ISD::FABS, DL, VT, X);
9989 
9990         break;
9991       }
9992     }
9993   }
9994 
9995   // FMUL -> FMA combines:
9996   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9997     AddToWorklist(Fused.getNode());
9998     return Fused;
9999   }
10000 
10001   return SDValue();
10002 }
10003 
10004 SDValue DAGCombiner::visitFMA(SDNode *N) {
10005   SDValue N0 = N->getOperand(0);
10006   SDValue N1 = N->getOperand(1);
10007   SDValue N2 = N->getOperand(2);
10008   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10009   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10010   EVT VT = N->getValueType(0);
10011   SDLoc DL(N);
10012   const TargetOptions &Options = DAG.getTarget().Options;
10013 
10014   // Constant fold FMA.
10015   if (isa<ConstantFPSDNode>(N0) &&
10016       isa<ConstantFPSDNode>(N1) &&
10017       isa<ConstantFPSDNode>(N2)) {
10018     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10019   }
10020 
10021   if (Options.UnsafeFPMath) {
10022     if (N0CFP && N0CFP->isZero())
10023       return N2;
10024     if (N1CFP && N1CFP->isZero())
10025       return N2;
10026   }
10027   // TODO: The FMA node should have flags that propagate to these nodes.
10028   if (N0CFP && N0CFP->isExactlyValue(1.0))
10029     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10030   if (N1CFP && N1CFP->isExactlyValue(1.0))
10031     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10032 
10033   // Canonicalize (fma c, x, y) -> (fma x, c, y)
10034   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10035      !isConstantFPBuildVectorOrConstantFP(N1))
10036     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10037 
10038   // TODO: FMA nodes should have flags that propagate to the created nodes.
10039   // For now, create a Flags object for use with all unsafe math transforms.
10040   SDNodeFlags Flags;
10041   Flags.setUnsafeAlgebra(true);
10042 
10043   if (Options.UnsafeFPMath) {
10044     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10045     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10046         isConstantFPBuildVectorOrConstantFP(N1) &&
10047         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10048       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10049                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10050                                      Flags), Flags);
10051     }
10052 
10053     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10054     if (N0.getOpcode() == ISD::FMUL &&
10055         isConstantFPBuildVectorOrConstantFP(N1) &&
10056         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10057       return DAG.getNode(ISD::FMA, DL, VT,
10058                          N0.getOperand(0),
10059                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10060                                      Flags),
10061                          N2);
10062     }
10063   }
10064 
10065   // (fma x, 1, y) -> (fadd x, y)
10066   // (fma x, -1, y) -> (fadd (fneg x), y)
10067   if (N1CFP) {
10068     if (N1CFP->isExactlyValue(1.0))
10069       // TODO: The FMA node should have flags that propagate to this node.
10070       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10071 
10072     if (N1CFP->isExactlyValue(-1.0) &&
10073         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10074       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10075       AddToWorklist(RHSNeg.getNode());
10076       // TODO: The FMA node should have flags that propagate to this node.
10077       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10078     }
10079   }
10080 
10081   if (Options.UnsafeFPMath) {
10082     // (fma x, c, x) -> (fmul x, (c+1))
10083     if (N1CFP && N0 == N2) {
10084       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10085                          DAG.getNode(ISD::FADD, DL, VT, N1,
10086                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10087                          Flags);
10088     }
10089 
10090     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10091     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10092       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10093                          DAG.getNode(ISD::FADD, DL, VT, N1,
10094                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10095                          Flags);
10096     }
10097   }
10098 
10099   return SDValue();
10100 }
10101 
10102 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10103 // reciprocal.
10104 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10105 // Notice that this is not always beneficial. One reason is different targets
10106 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10107 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10108 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10109 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10110   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10111   const SDNodeFlags Flags = N->getFlags();
10112   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10113     return SDValue();
10114 
10115   // Skip if current node is a reciprocal.
10116   SDValue N0 = N->getOperand(0);
10117   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10118   if (N0CFP && N0CFP->isExactlyValue(1.0))
10119     return SDValue();
10120 
10121   // Exit early if the target does not want this transform or if there can't
10122   // possibly be enough uses of the divisor to make the transform worthwhile.
10123   SDValue N1 = N->getOperand(1);
10124   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10125   if (!MinUses || N1->use_size() < MinUses)
10126     return SDValue();
10127 
10128   // Find all FDIV users of the same divisor.
10129   // Use a set because duplicates may be present in the user list.
10130   SetVector<SDNode *> Users;
10131   for (auto *U : N1->uses()) {
10132     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10133       // This division is eligible for optimization only if global unsafe math
10134       // is enabled or if this division allows reciprocal formation.
10135       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10136         Users.insert(U);
10137     }
10138   }
10139 
10140   // Now that we have the actual number of divisor uses, make sure it meets
10141   // the minimum threshold specified by the target.
10142   if (Users.size() < MinUses)
10143     return SDValue();
10144 
10145   EVT VT = N->getValueType(0);
10146   SDLoc DL(N);
10147   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10148   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10149 
10150   // Dividend / Divisor -> Dividend * Reciprocal
10151   for (auto *U : Users) {
10152     SDValue Dividend = U->getOperand(0);
10153     if (Dividend != FPOne) {
10154       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10155                                     Reciprocal, Flags);
10156       CombineTo(U, NewNode);
10157     } else if (U != Reciprocal.getNode()) {
10158       // In the absence of fast-math-flags, this user node is always the
10159       // same node as Reciprocal, but with FMF they may be different nodes.
10160       CombineTo(U, Reciprocal);
10161     }
10162   }
10163   return SDValue(N, 0);  // N was replaced.
10164 }
10165 
10166 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10167   SDValue N0 = N->getOperand(0);
10168   SDValue N1 = N->getOperand(1);
10169   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10170   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10171   EVT VT = N->getValueType(0);
10172   SDLoc DL(N);
10173   const TargetOptions &Options = DAG.getTarget().Options;
10174   SDNodeFlags Flags = N->getFlags();
10175 
10176   // fold vector ops
10177   if (VT.isVector())
10178     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10179       return FoldedVOp;
10180 
10181   // fold (fdiv c1, c2) -> c1/c2
10182   if (N0CFP && N1CFP)
10183     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10184 
10185   if (SDValue NewSel = foldBinOpIntoSelect(N))
10186     return NewSel;
10187 
10188   if (Options.UnsafeFPMath) {
10189     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10190     if (N1CFP) {
10191       // Compute the reciprocal 1.0 / c2.
10192       const APFloat &N1APF = N1CFP->getValueAPF();
10193       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10194       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10195       // Only do the transform if the reciprocal is a legal fp immediate that
10196       // isn't too nasty (eg NaN, denormal, ...).
10197       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10198           (!LegalOperations ||
10199            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10200            // backend)... we should handle this gracefully after Legalize.
10201            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
10202            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
10203            TLI.isFPImmLegal(Recip, VT)))
10204         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10205                            DAG.getConstantFP(Recip, DL, VT), Flags);
10206     }
10207 
10208     // If this FDIV is part of a reciprocal square root, it may be folded
10209     // into a target-specific square root estimate instruction.
10210     if (N1.getOpcode() == ISD::FSQRT) {
10211       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10212         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10213       }
10214     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10215                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10216       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10217                                           Flags)) {
10218         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10219         AddToWorklist(RV.getNode());
10220         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10221       }
10222     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10223                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10224       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10225                                           Flags)) {
10226         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10227         AddToWorklist(RV.getNode());
10228         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10229       }
10230     } else if (N1.getOpcode() == ISD::FMUL) {
10231       // Look through an FMUL. Even though this won't remove the FDIV directly,
10232       // it's still worthwhile to get rid of the FSQRT if possible.
10233       SDValue SqrtOp;
10234       SDValue OtherOp;
10235       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10236         SqrtOp = N1.getOperand(0);
10237         OtherOp = N1.getOperand(1);
10238       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10239         SqrtOp = N1.getOperand(1);
10240         OtherOp = N1.getOperand(0);
10241       }
10242       if (SqrtOp.getNode()) {
10243         // We found a FSQRT, so try to make this fold:
10244         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10245         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10246           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10247           AddToWorklist(RV.getNode());
10248           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10249         }
10250       }
10251     }
10252 
10253     // Fold into a reciprocal estimate and multiply instead of a real divide.
10254     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10255       AddToWorklist(RV.getNode());
10256       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10257     }
10258   }
10259 
10260   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10261   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10262     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10263       // Both can be negated for free, check to see if at least one is cheaper
10264       // negated.
10265       if (LHSNeg == 2 || RHSNeg == 2)
10266         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10267                            GetNegatedExpression(N0, DAG, LegalOperations),
10268                            GetNegatedExpression(N1, DAG, LegalOperations),
10269                            Flags);
10270     }
10271   }
10272 
10273   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10274     return CombineRepeatedDivisors;
10275 
10276   return SDValue();
10277 }
10278 
10279 SDValue DAGCombiner::visitFREM(SDNode *N) {
10280   SDValue N0 = N->getOperand(0);
10281   SDValue N1 = N->getOperand(1);
10282   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10283   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10284   EVT VT = N->getValueType(0);
10285 
10286   // fold (frem c1, c2) -> fmod(c1,c2)
10287   if (N0CFP && N1CFP)
10288     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10289 
10290   if (SDValue NewSel = foldBinOpIntoSelect(N))
10291     return NewSel;
10292 
10293   return SDValue();
10294 }
10295 
10296 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10297   if (!DAG.getTarget().Options.UnsafeFPMath)
10298     return SDValue();
10299 
10300   SDValue N0 = N->getOperand(0);
10301   if (TLI.isFsqrtCheap(N0, DAG))
10302     return SDValue();
10303 
10304   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10305   // For now, create a Flags object for use with all unsafe math transforms.
10306   SDNodeFlags Flags;
10307   Flags.setUnsafeAlgebra(true);
10308   return buildSqrtEstimate(N0, Flags);
10309 }
10310 
10311 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10312 /// copysign(x, fp_round(y)) -> copysign(x, y)
10313 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10314   SDValue N1 = N->getOperand(1);
10315   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10316        N1.getOpcode() == ISD::FP_ROUND)) {
10317     // Do not optimize out type conversion of f128 type yet.
10318     // For some targets like x86_64, configuration is changed to keep one f128
10319     // value in one SSE register, but instruction selection cannot handle
10320     // FCOPYSIGN on SSE registers yet.
10321     EVT N1VT = N1->getValueType(0);
10322     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10323     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10324   }
10325   return false;
10326 }
10327 
10328 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10329   SDValue N0 = N->getOperand(0);
10330   SDValue N1 = N->getOperand(1);
10331   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10332   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10333   EVT VT = N->getValueType(0);
10334 
10335   if (N0CFP && N1CFP) // Constant fold
10336     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10337 
10338   if (N1CFP) {
10339     const APFloat &V = N1CFP->getValueAPF();
10340     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10341     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10342     if (!V.isNegative()) {
10343       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10344         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10345     } else {
10346       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10347         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10348                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10349     }
10350   }
10351 
10352   // copysign(fabs(x), y) -> copysign(x, y)
10353   // copysign(fneg(x), y) -> copysign(x, y)
10354   // copysign(copysign(x,z), y) -> copysign(x, y)
10355   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10356       N0.getOpcode() == ISD::FCOPYSIGN)
10357     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10358 
10359   // copysign(x, abs(y)) -> abs(x)
10360   if (N1.getOpcode() == ISD::FABS)
10361     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10362 
10363   // copysign(x, copysign(y,z)) -> copysign(x, z)
10364   if (N1.getOpcode() == ISD::FCOPYSIGN)
10365     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10366 
10367   // copysign(x, fp_extend(y)) -> copysign(x, y)
10368   // copysign(x, fp_round(y)) -> copysign(x, y)
10369   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10370     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10371 
10372   return SDValue();
10373 }
10374 
10375 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10376   SDValue N0 = N->getOperand(0);
10377   EVT VT = N->getValueType(0);
10378   EVT OpVT = N0.getValueType();
10379 
10380   // fold (sint_to_fp c1) -> c1fp
10381   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10382       // ...but only if the target supports immediate floating-point values
10383       (!LegalOperations ||
10384        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10385     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10386 
10387   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10388   // but UINT_TO_FP is legal on this target, try to convert.
10389   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10390       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10391     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10392     if (DAG.SignBitIsZero(N0))
10393       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10394   }
10395 
10396   // The next optimizations are desirable only if SELECT_CC can be lowered.
10397   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10398     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10399     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10400         !VT.isVector() &&
10401         (!LegalOperations ||
10402          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10403       SDLoc DL(N);
10404       SDValue Ops[] =
10405         { N0.getOperand(0), N0.getOperand(1),
10406           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10407           N0.getOperand(2) };
10408       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10409     }
10410 
10411     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10412     //      (select_cc x, y, 1.0, 0.0,, cc)
10413     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10414         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10415         (!LegalOperations ||
10416          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10417       SDLoc DL(N);
10418       SDValue Ops[] =
10419         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10420           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10421           N0.getOperand(0).getOperand(2) };
10422       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10423     }
10424   }
10425 
10426   return SDValue();
10427 }
10428 
10429 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10430   SDValue N0 = N->getOperand(0);
10431   EVT VT = N->getValueType(0);
10432   EVT OpVT = N0.getValueType();
10433 
10434   // fold (uint_to_fp c1) -> c1fp
10435   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10436       // ...but only if the target supports immediate floating-point values
10437       (!LegalOperations ||
10438        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10439     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10440 
10441   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10442   // but SINT_TO_FP is legal on this target, try to convert.
10443   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10444       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10445     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10446     if (DAG.SignBitIsZero(N0))
10447       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10448   }
10449 
10450   // The next optimizations are desirable only if SELECT_CC can be lowered.
10451   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10452     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10453 
10454     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10455         (!LegalOperations ||
10456          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10457       SDLoc DL(N);
10458       SDValue Ops[] =
10459         { N0.getOperand(0), N0.getOperand(1),
10460           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10461           N0.getOperand(2) };
10462       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10463     }
10464   }
10465 
10466   return SDValue();
10467 }
10468 
10469 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10470 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10471   SDValue N0 = N->getOperand(0);
10472   EVT VT = N->getValueType(0);
10473 
10474   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10475     return SDValue();
10476 
10477   SDValue Src = N0.getOperand(0);
10478   EVT SrcVT = Src.getValueType();
10479   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10480   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10481 
10482   // We can safely assume the conversion won't overflow the output range,
10483   // because (for example) (uint8_t)18293.f is undefined behavior.
10484 
10485   // Since we can assume the conversion won't overflow, our decision as to
10486   // whether the input will fit in the float should depend on the minimum
10487   // of the input range and output range.
10488 
10489   // This means this is also safe for a signed input and unsigned output, since
10490   // a negative input would lead to undefined behavior.
10491   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10492   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10493   unsigned ActualSize = std::min(InputSize, OutputSize);
10494   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10495 
10496   // We can only fold away the float conversion if the input range can be
10497   // represented exactly in the float range.
10498   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10499     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10500       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10501                                                        : ISD::ZERO_EXTEND;
10502       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10503     }
10504     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10505       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10506     return DAG.getBitcast(VT, Src);
10507   }
10508   return SDValue();
10509 }
10510 
10511 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10512   SDValue N0 = N->getOperand(0);
10513   EVT VT = N->getValueType(0);
10514 
10515   // fold (fp_to_sint c1fp) -> c1
10516   if (isConstantFPBuildVectorOrConstantFP(N0))
10517     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10518 
10519   return FoldIntToFPToInt(N, DAG);
10520 }
10521 
10522 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10523   SDValue N0 = N->getOperand(0);
10524   EVT VT = N->getValueType(0);
10525 
10526   // fold (fp_to_uint c1fp) -> c1
10527   if (isConstantFPBuildVectorOrConstantFP(N0))
10528     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10529 
10530   return FoldIntToFPToInt(N, DAG);
10531 }
10532 
10533 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10534   SDValue N0 = N->getOperand(0);
10535   SDValue N1 = N->getOperand(1);
10536   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10537   EVT VT = N->getValueType(0);
10538 
10539   // fold (fp_round c1fp) -> c1fp
10540   if (N0CFP)
10541     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10542 
10543   // fold (fp_round (fp_extend x)) -> x
10544   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10545     return N0.getOperand(0);
10546 
10547   // fold (fp_round (fp_round x)) -> (fp_round x)
10548   if (N0.getOpcode() == ISD::FP_ROUND) {
10549     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10550     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10551 
10552     // Skip this folding if it results in an fp_round from f80 to f16.
10553     //
10554     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10555     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10556     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10557     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10558     // x86.
10559     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10560       return SDValue();
10561 
10562     // If the first fp_round isn't a value preserving truncation, it might
10563     // introduce a tie in the second fp_round, that wouldn't occur in the
10564     // single-step fp_round we want to fold to.
10565     // In other words, double rounding isn't the same as rounding.
10566     // Also, this is a value preserving truncation iff both fp_round's are.
10567     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10568       SDLoc DL(N);
10569       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10570                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10571     }
10572   }
10573 
10574   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10575   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10576     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10577                               N0.getOperand(0), N1);
10578     AddToWorklist(Tmp.getNode());
10579     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10580                        Tmp, N0.getOperand(1));
10581   }
10582 
10583   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10584     return NewVSel;
10585 
10586   return SDValue();
10587 }
10588 
10589 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10590   SDValue N0 = N->getOperand(0);
10591   EVT VT = N->getValueType(0);
10592   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10593   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10594 
10595   // fold (fp_round_inreg c1fp) -> c1fp
10596   if (N0CFP && isTypeLegal(EVT)) {
10597     SDLoc DL(N);
10598     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10599     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10600   }
10601 
10602   return SDValue();
10603 }
10604 
10605 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10606   SDValue N0 = N->getOperand(0);
10607   EVT VT = N->getValueType(0);
10608 
10609   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10610   if (N->hasOneUse() &&
10611       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10612     return SDValue();
10613 
10614   // fold (fp_extend c1fp) -> c1fp
10615   if (isConstantFPBuildVectorOrConstantFP(N0))
10616     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10617 
10618   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10619   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10620       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10621     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10622 
10623   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10624   // value of X.
10625   if (N0.getOpcode() == ISD::FP_ROUND
10626       && N0.getConstantOperandVal(1) == 1) {
10627     SDValue In = N0.getOperand(0);
10628     if (In.getValueType() == VT) return In;
10629     if (VT.bitsLT(In.getValueType()))
10630       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10631                          In, N0.getOperand(1));
10632     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10633   }
10634 
10635   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10636   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10637        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10638     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10639     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10640                                      LN0->getChain(),
10641                                      LN0->getBasePtr(), N0.getValueType(),
10642                                      LN0->getMemOperand());
10643     CombineTo(N, ExtLoad);
10644     CombineTo(N0.getNode(),
10645               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10646                           N0.getValueType(), ExtLoad,
10647                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10648               ExtLoad.getValue(1));
10649     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10650   }
10651 
10652   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10653     return NewVSel;
10654 
10655   return SDValue();
10656 }
10657 
10658 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10659   SDValue N0 = N->getOperand(0);
10660   EVT VT = N->getValueType(0);
10661 
10662   // fold (fceil c1) -> fceil(c1)
10663   if (isConstantFPBuildVectorOrConstantFP(N0))
10664     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10665 
10666   return SDValue();
10667 }
10668 
10669 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10670   SDValue N0 = N->getOperand(0);
10671   EVT VT = N->getValueType(0);
10672 
10673   // fold (ftrunc c1) -> ftrunc(c1)
10674   if (isConstantFPBuildVectorOrConstantFP(N0))
10675     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10676 
10677   return SDValue();
10678 }
10679 
10680 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10681   SDValue N0 = N->getOperand(0);
10682   EVT VT = N->getValueType(0);
10683 
10684   // fold (ffloor c1) -> ffloor(c1)
10685   if (isConstantFPBuildVectorOrConstantFP(N0))
10686     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10687 
10688   return SDValue();
10689 }
10690 
10691 // FIXME: FNEG and FABS have a lot in common; refactor.
10692 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10693   SDValue N0 = N->getOperand(0);
10694   EVT VT = N->getValueType(0);
10695 
10696   // Constant fold FNEG.
10697   if (isConstantFPBuildVectorOrConstantFP(N0))
10698     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10699 
10700   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10701                          &DAG.getTarget().Options))
10702     return GetNegatedExpression(N0, DAG, LegalOperations);
10703 
10704   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10705   // constant pool values.
10706   if (!TLI.isFNegFree(VT) &&
10707       N0.getOpcode() == ISD::BITCAST &&
10708       N0.getNode()->hasOneUse()) {
10709     SDValue Int = N0.getOperand(0);
10710     EVT IntVT = Int.getValueType();
10711     if (IntVT.isInteger() && !IntVT.isVector()) {
10712       APInt SignMask;
10713       if (N0.getValueType().isVector()) {
10714         // For a vector, get a mask such as 0x80... per scalar element
10715         // and splat it.
10716         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10717         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10718       } else {
10719         // For a scalar, just generate 0x80...
10720         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10721       }
10722       SDLoc DL0(N0);
10723       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10724                         DAG.getConstant(SignMask, DL0, IntVT));
10725       AddToWorklist(Int.getNode());
10726       return DAG.getBitcast(VT, Int);
10727     }
10728   }
10729 
10730   // (fneg (fmul c, x)) -> (fmul -c, x)
10731   if (N0.getOpcode() == ISD::FMUL &&
10732       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10733     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10734     if (CFP1) {
10735       APFloat CVal = CFP1->getValueAPF();
10736       CVal.changeSign();
10737       if (Level >= AfterLegalizeDAG &&
10738           (TLI.isFPImmLegal(CVal, VT) ||
10739            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10740         return DAG.getNode(
10741             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10742             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10743             N0->getFlags());
10744     }
10745   }
10746 
10747   return SDValue();
10748 }
10749 
10750 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10751   SDValue N0 = N->getOperand(0);
10752   SDValue N1 = N->getOperand(1);
10753   EVT VT = N->getValueType(0);
10754   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10755   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10756 
10757   if (N0CFP && N1CFP) {
10758     const APFloat &C0 = N0CFP->getValueAPF();
10759     const APFloat &C1 = N1CFP->getValueAPF();
10760     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10761   }
10762 
10763   // Canonicalize to constant on RHS.
10764   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10765      !isConstantFPBuildVectorOrConstantFP(N1))
10766     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10767 
10768   return SDValue();
10769 }
10770 
10771 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10772   SDValue N0 = N->getOperand(0);
10773   SDValue N1 = N->getOperand(1);
10774   EVT VT = N->getValueType(0);
10775   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10776   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10777 
10778   if (N0CFP && N1CFP) {
10779     const APFloat &C0 = N0CFP->getValueAPF();
10780     const APFloat &C1 = N1CFP->getValueAPF();
10781     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10782   }
10783 
10784   // Canonicalize to constant on RHS.
10785   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10786      !isConstantFPBuildVectorOrConstantFP(N1))
10787     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10788 
10789   return SDValue();
10790 }
10791 
10792 SDValue DAGCombiner::visitFABS(SDNode *N) {
10793   SDValue N0 = N->getOperand(0);
10794   EVT VT = N->getValueType(0);
10795 
10796   // fold (fabs c1) -> fabs(c1)
10797   if (isConstantFPBuildVectorOrConstantFP(N0))
10798     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10799 
10800   // fold (fabs (fabs x)) -> (fabs x)
10801   if (N0.getOpcode() == ISD::FABS)
10802     return N->getOperand(0);
10803 
10804   // fold (fabs (fneg x)) -> (fabs x)
10805   // fold (fabs (fcopysign x, y)) -> (fabs x)
10806   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10807     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10808 
10809   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10810   // constant pool values.
10811   if (!TLI.isFAbsFree(VT) &&
10812       N0.getOpcode() == ISD::BITCAST &&
10813       N0.getNode()->hasOneUse()) {
10814     SDValue Int = N0.getOperand(0);
10815     EVT IntVT = Int.getValueType();
10816     if (IntVT.isInteger() && !IntVT.isVector()) {
10817       APInt SignMask;
10818       if (N0.getValueType().isVector()) {
10819         // For a vector, get a mask such as 0x7f... per scalar element
10820         // and splat it.
10821         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10822         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10823       } else {
10824         // For a scalar, just generate 0x7f...
10825         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10826       }
10827       SDLoc DL(N0);
10828       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10829                         DAG.getConstant(SignMask, DL, IntVT));
10830       AddToWorklist(Int.getNode());
10831       return DAG.getBitcast(N->getValueType(0), Int);
10832     }
10833   }
10834 
10835   return SDValue();
10836 }
10837 
10838 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10839   SDValue Chain = N->getOperand(0);
10840   SDValue N1 = N->getOperand(1);
10841   SDValue N2 = N->getOperand(2);
10842 
10843   // If N is a constant we could fold this into a fallthrough or unconditional
10844   // branch. However that doesn't happen very often in normal code, because
10845   // Instcombine/SimplifyCFG should have handled the available opportunities.
10846   // If we did this folding here, it would be necessary to update the
10847   // MachineBasicBlock CFG, which is awkward.
10848 
10849   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10850   // on the target.
10851   if (N1.getOpcode() == ISD::SETCC &&
10852       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10853                                    N1.getOperand(0).getValueType())) {
10854     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10855                        Chain, N1.getOperand(2),
10856                        N1.getOperand(0), N1.getOperand(1), N2);
10857   }
10858 
10859   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10860       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10861        (N1.getOperand(0).hasOneUse() &&
10862         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10863     SDNode *Trunc = nullptr;
10864     if (N1.getOpcode() == ISD::TRUNCATE) {
10865       // Look pass the truncate.
10866       Trunc = N1.getNode();
10867       N1 = N1.getOperand(0);
10868     }
10869 
10870     // Match this pattern so that we can generate simpler code:
10871     //
10872     //   %a = ...
10873     //   %b = and i32 %a, 2
10874     //   %c = srl i32 %b, 1
10875     //   brcond i32 %c ...
10876     //
10877     // into
10878     //
10879     //   %a = ...
10880     //   %b = and i32 %a, 2
10881     //   %c = setcc eq %b, 0
10882     //   brcond %c ...
10883     //
10884     // This applies only when the AND constant value has one bit set and the
10885     // SRL constant is equal to the log2 of the AND constant. The back-end is
10886     // smart enough to convert the result into a TEST/JMP sequence.
10887     SDValue Op0 = N1.getOperand(0);
10888     SDValue Op1 = N1.getOperand(1);
10889 
10890     if (Op0.getOpcode() == ISD::AND &&
10891         Op1.getOpcode() == ISD::Constant) {
10892       SDValue AndOp1 = Op0.getOperand(1);
10893 
10894       if (AndOp1.getOpcode() == ISD::Constant) {
10895         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10896 
10897         if (AndConst.isPowerOf2() &&
10898             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10899           SDLoc DL(N);
10900           SDValue SetCC =
10901             DAG.getSetCC(DL,
10902                          getSetCCResultType(Op0.getValueType()),
10903                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10904                          ISD::SETNE);
10905 
10906           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10907                                           MVT::Other, Chain, SetCC, N2);
10908           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10909           // will convert it back to (X & C1) >> C2.
10910           CombineTo(N, NewBRCond, false);
10911           // Truncate is dead.
10912           if (Trunc)
10913             deleteAndRecombine(Trunc);
10914           // Replace the uses of SRL with SETCC
10915           WorklistRemover DeadNodes(*this);
10916           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10917           deleteAndRecombine(N1.getNode());
10918           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10919         }
10920       }
10921     }
10922 
10923     if (Trunc)
10924       // Restore N1 if the above transformation doesn't match.
10925       N1 = N->getOperand(1);
10926   }
10927 
10928   // Transform br(xor(x, y)) -> br(x != y)
10929   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10930   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10931     SDNode *TheXor = N1.getNode();
10932     SDValue Op0 = TheXor->getOperand(0);
10933     SDValue Op1 = TheXor->getOperand(1);
10934     if (Op0.getOpcode() == Op1.getOpcode()) {
10935       // Avoid missing important xor optimizations.
10936       if (SDValue Tmp = visitXOR(TheXor)) {
10937         if (Tmp.getNode() != TheXor) {
10938           DEBUG(dbgs() << "\nReplacing.8 ";
10939                 TheXor->dump(&DAG);
10940                 dbgs() << "\nWith: ";
10941                 Tmp.getNode()->dump(&DAG);
10942                 dbgs() << '\n');
10943           WorklistRemover DeadNodes(*this);
10944           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10945           deleteAndRecombine(TheXor);
10946           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10947                              MVT::Other, Chain, Tmp, N2);
10948         }
10949 
10950         // visitXOR has changed XOR's operands or replaced the XOR completely,
10951         // bail out.
10952         return SDValue(N, 0);
10953       }
10954     }
10955 
10956     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10957       bool Equal = false;
10958       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10959           Op0.getOpcode() == ISD::XOR) {
10960         TheXor = Op0.getNode();
10961         Equal = true;
10962       }
10963 
10964       EVT SetCCVT = N1.getValueType();
10965       if (LegalTypes)
10966         SetCCVT = getSetCCResultType(SetCCVT);
10967       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10968                                    SetCCVT,
10969                                    Op0, Op1,
10970                                    Equal ? ISD::SETEQ : ISD::SETNE);
10971       // Replace the uses of XOR with SETCC
10972       WorklistRemover DeadNodes(*this);
10973       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10974       deleteAndRecombine(N1.getNode());
10975       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10976                          MVT::Other, Chain, SetCC, N2);
10977     }
10978   }
10979 
10980   return SDValue();
10981 }
10982 
10983 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10984 //
10985 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10986   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10987   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10988 
10989   // If N is a constant we could fold this into a fallthrough or unconditional
10990   // branch. However that doesn't happen very often in normal code, because
10991   // Instcombine/SimplifyCFG should have handled the available opportunities.
10992   // If we did this folding here, it would be necessary to update the
10993   // MachineBasicBlock CFG, which is awkward.
10994 
10995   // Use SimplifySetCC to simplify SETCC's.
10996   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10997                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10998                                false);
10999   if (Simp.getNode()) AddToWorklist(Simp.getNode());
11000 
11001   // fold to a simpler setcc
11002   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11003     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11004                        N->getOperand(0), Simp.getOperand(2),
11005                        Simp.getOperand(0), Simp.getOperand(1),
11006                        N->getOperand(4));
11007 
11008   return SDValue();
11009 }
11010 
11011 /// Return true if 'Use' is a load or a store that uses N as its base pointer
11012 /// and that N may be folded in the load / store addressing mode.
11013 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11014                                     SelectionDAG &DAG,
11015                                     const TargetLowering &TLI) {
11016   EVT VT;
11017   unsigned AS;
11018 
11019   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
11020     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11021       return false;
11022     VT = LD->getMemoryVT();
11023     AS = LD->getAddressSpace();
11024   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
11025     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11026       return false;
11027     VT = ST->getMemoryVT();
11028     AS = ST->getAddressSpace();
11029   } else
11030     return false;
11031 
11032   TargetLowering::AddrMode AM;
11033   if (N->getOpcode() == ISD::ADD) {
11034     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11035     if (Offset)
11036       // [reg +/- imm]
11037       AM.BaseOffs = Offset->getSExtValue();
11038     else
11039       // [reg +/- reg]
11040       AM.Scale = 1;
11041   } else if (N->getOpcode() == ISD::SUB) {
11042     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11043     if (Offset)
11044       // [reg +/- imm]
11045       AM.BaseOffs = -Offset->getSExtValue();
11046     else
11047       // [reg +/- reg]
11048       AM.Scale = 1;
11049   } else
11050     return false;
11051 
11052   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11053                                    VT.getTypeForEVT(*DAG.getContext()), AS);
11054 }
11055 
11056 /// Try turning a load/store into a pre-indexed load/store when the base
11057 /// pointer is an add or subtract and it has other uses besides the load/store.
11058 /// After the transformation, the new indexed load/store has effectively folded
11059 /// the add/subtract in and all of its other uses are redirected to the
11060 /// new load/store.
11061 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11062   if (Level < AfterLegalizeDAG)
11063     return false;
11064 
11065   bool isLoad = true;
11066   SDValue Ptr;
11067   EVT VT;
11068   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11069     if (LD->isIndexed())
11070       return false;
11071     VT = LD->getMemoryVT();
11072     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11073         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11074       return false;
11075     Ptr = LD->getBasePtr();
11076   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11077     if (ST->isIndexed())
11078       return false;
11079     VT = ST->getMemoryVT();
11080     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11081         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11082       return false;
11083     Ptr = ST->getBasePtr();
11084     isLoad = false;
11085   } else {
11086     return false;
11087   }
11088 
11089   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11090   // out.  There is no reason to make this a preinc/predec.
11091   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11092       Ptr.getNode()->hasOneUse())
11093     return false;
11094 
11095   // Ask the target to do addressing mode selection.
11096   SDValue BasePtr;
11097   SDValue Offset;
11098   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11099   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11100     return false;
11101 
11102   // Backends without true r+i pre-indexed forms may need to pass a
11103   // constant base with a variable offset so that constant coercion
11104   // will work with the patterns in canonical form.
11105   bool Swapped = false;
11106   if (isa<ConstantSDNode>(BasePtr)) {
11107     std::swap(BasePtr, Offset);
11108     Swapped = true;
11109   }
11110 
11111   // Don't create a indexed load / store with zero offset.
11112   if (isNullConstant(Offset))
11113     return false;
11114 
11115   // Try turning it into a pre-indexed load / store except when:
11116   // 1) The new base ptr is a frame index.
11117   // 2) If N is a store and the new base ptr is either the same as or is a
11118   //    predecessor of the value being stored.
11119   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11120   //    that would create a cycle.
11121   // 4) All uses are load / store ops that use it as old base ptr.
11122 
11123   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11124   // (plus the implicit offset) to a register to preinc anyway.
11125   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11126     return false;
11127 
11128   // Check #2.
11129   if (!isLoad) {
11130     SDValue Val = cast<StoreSDNode>(N)->getValue();
11131     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11132       return false;
11133   }
11134 
11135   // Caches for hasPredecessorHelper.
11136   SmallPtrSet<const SDNode *, 32> Visited;
11137   SmallVector<const SDNode *, 16> Worklist;
11138   Worklist.push_back(N);
11139 
11140   // If the offset is a constant, there may be other adds of constants that
11141   // can be folded with this one. We should do this to avoid having to keep
11142   // a copy of the original base pointer.
11143   SmallVector<SDNode *, 16> OtherUses;
11144   if (isa<ConstantSDNode>(Offset))
11145     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11146                               UE = BasePtr.getNode()->use_end();
11147          UI != UE; ++UI) {
11148       SDUse &Use = UI.getUse();
11149       // Skip the use that is Ptr and uses of other results from BasePtr's
11150       // node (important for nodes that return multiple results).
11151       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11152         continue;
11153 
11154       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11155         continue;
11156 
11157       if (Use.getUser()->getOpcode() != ISD::ADD &&
11158           Use.getUser()->getOpcode() != ISD::SUB) {
11159         OtherUses.clear();
11160         break;
11161       }
11162 
11163       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11164       if (!isa<ConstantSDNode>(Op1)) {
11165         OtherUses.clear();
11166         break;
11167       }
11168 
11169       // FIXME: In some cases, we can be smarter about this.
11170       if (Op1.getValueType() != Offset.getValueType()) {
11171         OtherUses.clear();
11172         break;
11173       }
11174 
11175       OtherUses.push_back(Use.getUser());
11176     }
11177 
11178   if (Swapped)
11179     std::swap(BasePtr, Offset);
11180 
11181   // Now check for #3 and #4.
11182   bool RealUse = false;
11183 
11184   for (SDNode *Use : Ptr.getNode()->uses()) {
11185     if (Use == N)
11186       continue;
11187     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11188       return false;
11189 
11190     // If Ptr may be folded in addressing mode of other use, then it's
11191     // not profitable to do this transformation.
11192     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11193       RealUse = true;
11194   }
11195 
11196   if (!RealUse)
11197     return false;
11198 
11199   SDValue Result;
11200   if (isLoad)
11201     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11202                                 BasePtr, Offset, AM);
11203   else
11204     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11205                                  BasePtr, Offset, AM);
11206   ++PreIndexedNodes;
11207   ++NodesCombined;
11208   DEBUG(dbgs() << "\nReplacing.4 ";
11209         N->dump(&DAG);
11210         dbgs() << "\nWith: ";
11211         Result.getNode()->dump(&DAG);
11212         dbgs() << '\n');
11213   WorklistRemover DeadNodes(*this);
11214   if (isLoad) {
11215     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11216     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11217   } else {
11218     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11219   }
11220 
11221   // Finally, since the node is now dead, remove it from the graph.
11222   deleteAndRecombine(N);
11223 
11224   if (Swapped)
11225     std::swap(BasePtr, Offset);
11226 
11227   // Replace other uses of BasePtr that can be updated to use Ptr
11228   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11229     unsigned OffsetIdx = 1;
11230     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11231       OffsetIdx = 0;
11232     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11233            BasePtr.getNode() && "Expected BasePtr operand");
11234 
11235     // We need to replace ptr0 in the following expression:
11236     //   x0 * offset0 + y0 * ptr0 = t0
11237     // knowing that
11238     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11239     //
11240     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11241     // indexed load/store and the expression that needs to be re-written.
11242     //
11243     // Therefore, we have:
11244     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11245 
11246     ConstantSDNode *CN =
11247       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11248     int X0, X1, Y0, Y1;
11249     const APInt &Offset0 = CN->getAPIntValue();
11250     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11251 
11252     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11253     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11254     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11255     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11256 
11257     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11258 
11259     APInt CNV = Offset0;
11260     if (X0 < 0) CNV = -CNV;
11261     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11262     else CNV = CNV - Offset1;
11263 
11264     SDLoc DL(OtherUses[i]);
11265 
11266     // We can now generate the new expression.
11267     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11268     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11269 
11270     SDValue NewUse = DAG.getNode(Opcode,
11271                                  DL,
11272                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11273     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11274     deleteAndRecombine(OtherUses[i]);
11275   }
11276 
11277   // Replace the uses of Ptr with uses of the updated base value.
11278   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11279   deleteAndRecombine(Ptr.getNode());
11280 
11281   return true;
11282 }
11283 
11284 /// Try to combine a load/store with a add/sub of the base pointer node into a
11285 /// post-indexed load/store. The transformation folded the add/subtract into the
11286 /// new indexed load/store effectively and all of its uses are redirected to the
11287 /// new load/store.
11288 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11289   if (Level < AfterLegalizeDAG)
11290     return false;
11291 
11292   bool isLoad = true;
11293   SDValue Ptr;
11294   EVT VT;
11295   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11296     if (LD->isIndexed())
11297       return false;
11298     VT = LD->getMemoryVT();
11299     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11300         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11301       return false;
11302     Ptr = LD->getBasePtr();
11303   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11304     if (ST->isIndexed())
11305       return false;
11306     VT = ST->getMemoryVT();
11307     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11308         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11309       return false;
11310     Ptr = ST->getBasePtr();
11311     isLoad = false;
11312   } else {
11313     return false;
11314   }
11315 
11316   if (Ptr.getNode()->hasOneUse())
11317     return false;
11318 
11319   for (SDNode *Op : Ptr.getNode()->uses()) {
11320     if (Op == N ||
11321         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11322       continue;
11323 
11324     SDValue BasePtr;
11325     SDValue Offset;
11326     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11327     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11328       // Don't create a indexed load / store with zero offset.
11329       if (isNullConstant(Offset))
11330         continue;
11331 
11332       // Try turning it into a post-indexed load / store except when
11333       // 1) All uses are load / store ops that use it as base ptr (and
11334       //    it may be folded as addressing mmode).
11335       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11336       //    nor a successor of N. Otherwise, if Op is folded that would
11337       //    create a cycle.
11338 
11339       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11340         continue;
11341 
11342       // Check for #1.
11343       bool TryNext = false;
11344       for (SDNode *Use : BasePtr.getNode()->uses()) {
11345         if (Use == Ptr.getNode())
11346           continue;
11347 
11348         // If all the uses are load / store addresses, then don't do the
11349         // transformation.
11350         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11351           bool RealUse = false;
11352           for (SDNode *UseUse : Use->uses()) {
11353             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11354               RealUse = true;
11355           }
11356 
11357           if (!RealUse) {
11358             TryNext = true;
11359             break;
11360           }
11361         }
11362       }
11363 
11364       if (TryNext)
11365         continue;
11366 
11367       // Check for #2
11368       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11369         SDValue Result = isLoad
11370           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11371                                BasePtr, Offset, AM)
11372           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11373                                 BasePtr, Offset, AM);
11374         ++PostIndexedNodes;
11375         ++NodesCombined;
11376         DEBUG(dbgs() << "\nReplacing.5 ";
11377               N->dump(&DAG);
11378               dbgs() << "\nWith: ";
11379               Result.getNode()->dump(&DAG);
11380               dbgs() << '\n');
11381         WorklistRemover DeadNodes(*this);
11382         if (isLoad) {
11383           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11384           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11385         } else {
11386           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11387         }
11388 
11389         // Finally, since the node is now dead, remove it from the graph.
11390         deleteAndRecombine(N);
11391 
11392         // Replace the uses of Use with uses of the updated base value.
11393         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11394                                       Result.getValue(isLoad ? 1 : 0));
11395         deleteAndRecombine(Op);
11396         return true;
11397       }
11398     }
11399   }
11400 
11401   return false;
11402 }
11403 
11404 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11405 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11406   ISD::MemIndexedMode AM = LD->getAddressingMode();
11407   assert(AM != ISD::UNINDEXED);
11408   SDValue BP = LD->getOperand(1);
11409   SDValue Inc = LD->getOperand(2);
11410 
11411   // Some backends use TargetConstants for load offsets, but don't expect
11412   // TargetConstants in general ADD nodes. We can convert these constants into
11413   // regular Constants (if the constant is not opaque).
11414   assert((Inc.getOpcode() != ISD::TargetConstant ||
11415           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11416          "Cannot split out indexing using opaque target constants");
11417   if (Inc.getOpcode() == ISD::TargetConstant) {
11418     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11419     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11420                           ConstInc->getValueType(0));
11421   }
11422 
11423   unsigned Opc =
11424       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11425   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11426 }
11427 
11428 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11429   LoadSDNode *LD  = cast<LoadSDNode>(N);
11430   SDValue Chain = LD->getChain();
11431   SDValue Ptr   = LD->getBasePtr();
11432 
11433   // If load is not volatile and there are no uses of the loaded value (and
11434   // the updated indexed value in case of indexed loads), change uses of the
11435   // chain value into uses of the chain input (i.e. delete the dead load).
11436   if (!LD->isVolatile()) {
11437     if (N->getValueType(1) == MVT::Other) {
11438       // Unindexed loads.
11439       if (!N->hasAnyUseOfValue(0)) {
11440         // It's not safe to use the two value CombineTo variant here. e.g.
11441         // v1, chain2 = load chain1, loc
11442         // v2, chain3 = load chain2, loc
11443         // v3         = add v2, c
11444         // Now we replace use of chain2 with chain1.  This makes the second load
11445         // isomorphic to the one we are deleting, and thus makes this load live.
11446         DEBUG(dbgs() << "\nReplacing.6 ";
11447               N->dump(&DAG);
11448               dbgs() << "\nWith chain: ";
11449               Chain.getNode()->dump(&DAG);
11450               dbgs() << "\n");
11451         WorklistRemover DeadNodes(*this);
11452         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11453         AddUsersToWorklist(Chain.getNode());
11454         if (N->use_empty())
11455           deleteAndRecombine(N);
11456 
11457         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11458       }
11459     } else {
11460       // Indexed loads.
11461       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11462 
11463       // If this load has an opaque TargetConstant offset, then we cannot split
11464       // the indexing into an add/sub directly (that TargetConstant may not be
11465       // valid for a different type of node, and we cannot convert an opaque
11466       // target constant into a regular constant).
11467       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11468                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11469 
11470       if (!N->hasAnyUseOfValue(0) &&
11471           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11472         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11473         SDValue Index;
11474         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11475           Index = SplitIndexingFromLoad(LD);
11476           // Try to fold the base pointer arithmetic into subsequent loads and
11477           // stores.
11478           AddUsersToWorklist(N);
11479         } else
11480           Index = DAG.getUNDEF(N->getValueType(1));
11481         DEBUG(dbgs() << "\nReplacing.7 ";
11482               N->dump(&DAG);
11483               dbgs() << "\nWith: ";
11484               Undef.getNode()->dump(&DAG);
11485               dbgs() << " and 2 other values\n");
11486         WorklistRemover DeadNodes(*this);
11487         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11488         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11489         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11490         deleteAndRecombine(N);
11491         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11492       }
11493     }
11494   }
11495 
11496   // If this load is directly stored, replace the load value with the stored
11497   // value.
11498   // TODO: Handle store large -> read small portion.
11499   // TODO: Handle TRUNCSTORE/LOADEXT
11500   if (OptLevel != CodeGenOpt::None &&
11501       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11502     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11503       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11504       if (PrevST->getBasePtr() == Ptr &&
11505           PrevST->getValue().getValueType() == N->getValueType(0))
11506         return CombineTo(N, PrevST->getOperand(1), Chain);
11507     }
11508   }
11509 
11510   // Try to infer better alignment information than the load already has.
11511   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11512     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11513       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11514         SDValue NewLoad = DAG.getExtLoad(
11515             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11516             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11517             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11518         if (NewLoad.getNode() != N)
11519           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11520       }
11521     }
11522   }
11523 
11524   if (LD->isUnindexed()) {
11525     // Walk up chain skipping non-aliasing memory nodes.
11526     SDValue BetterChain = FindBetterChain(N, Chain);
11527 
11528     // If there is a better chain.
11529     if (Chain != BetterChain) {
11530       SDValue ReplLoad;
11531 
11532       // Replace the chain to void dependency.
11533       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11534         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11535                                BetterChain, Ptr, LD->getMemOperand());
11536       } else {
11537         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11538                                   LD->getValueType(0),
11539                                   BetterChain, Ptr, LD->getMemoryVT(),
11540                                   LD->getMemOperand());
11541       }
11542 
11543       // Create token factor to keep old chain connected.
11544       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11545                                   MVT::Other, Chain, ReplLoad.getValue(1));
11546 
11547       // Replace uses with load result and token factor
11548       return CombineTo(N, ReplLoad.getValue(0), Token);
11549     }
11550   }
11551 
11552   // Try transforming N to an indexed load.
11553   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11554     return SDValue(N, 0);
11555 
11556   // Try to slice up N to more direct loads if the slices are mapped to
11557   // different register banks or pairing can take place.
11558   if (SliceUpLoad(N))
11559     return SDValue(N, 0);
11560 
11561   return SDValue();
11562 }
11563 
11564 namespace {
11565 /// \brief Helper structure used to slice a load in smaller loads.
11566 /// Basically a slice is obtained from the following sequence:
11567 /// Origin = load Ty1, Base
11568 /// Shift = srl Ty1 Origin, CstTy Amount
11569 /// Inst = trunc Shift to Ty2
11570 ///
11571 /// Then, it will be rewritten into:
11572 /// Slice = load SliceTy, Base + SliceOffset
11573 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11574 ///
11575 /// SliceTy is deduced from the number of bits that are actually used to
11576 /// build Inst.
11577 struct LoadedSlice {
11578   /// \brief Helper structure used to compute the cost of a slice.
11579   struct Cost {
11580     /// Are we optimizing for code size.
11581     bool ForCodeSize;
11582     /// Various cost.
11583     unsigned Loads;
11584     unsigned Truncates;
11585     unsigned CrossRegisterBanksCopies;
11586     unsigned ZExts;
11587     unsigned Shift;
11588 
11589     Cost(bool ForCodeSize = false)
11590         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11591           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11592 
11593     /// \brief Get the cost of one isolated slice.
11594     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11595         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11596           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11597       EVT TruncType = LS.Inst->getValueType(0);
11598       EVT LoadedType = LS.getLoadedType();
11599       if (TruncType != LoadedType &&
11600           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11601         ZExts = 1;
11602     }
11603 
11604     /// \brief Account for slicing gain in the current cost.
11605     /// Slicing provide a few gains like removing a shift or a
11606     /// truncate. This method allows to grow the cost of the original
11607     /// load with the gain from this slice.
11608     void addSliceGain(const LoadedSlice &LS) {
11609       // Each slice saves a truncate.
11610       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11611       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11612                               LS.Inst->getValueType(0)))
11613         ++Truncates;
11614       // If there is a shift amount, this slice gets rid of it.
11615       if (LS.Shift)
11616         ++Shift;
11617       // If this slice can merge a cross register bank copy, account for it.
11618       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11619         ++CrossRegisterBanksCopies;
11620     }
11621 
11622     Cost &operator+=(const Cost &RHS) {
11623       Loads += RHS.Loads;
11624       Truncates += RHS.Truncates;
11625       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11626       ZExts += RHS.ZExts;
11627       Shift += RHS.Shift;
11628       return *this;
11629     }
11630 
11631     bool operator==(const Cost &RHS) const {
11632       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11633              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11634              ZExts == RHS.ZExts && Shift == RHS.Shift;
11635     }
11636 
11637     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11638 
11639     bool operator<(const Cost &RHS) const {
11640       // Assume cross register banks copies are as expensive as loads.
11641       // FIXME: Do we want some more target hooks?
11642       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11643       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11644       // Unless we are optimizing for code size, consider the
11645       // expensive operation first.
11646       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11647         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11648       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11649              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11650     }
11651 
11652     bool operator>(const Cost &RHS) const { return RHS < *this; }
11653 
11654     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11655 
11656     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11657   };
11658   // The last instruction that represent the slice. This should be a
11659   // truncate instruction.
11660   SDNode *Inst;
11661   // The original load instruction.
11662   LoadSDNode *Origin;
11663   // The right shift amount in bits from the original load.
11664   unsigned Shift;
11665   // The DAG from which Origin came from.
11666   // This is used to get some contextual information about legal types, etc.
11667   SelectionDAG *DAG;
11668 
11669   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11670               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11671       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11672 
11673   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11674   /// \return Result is \p BitWidth and has used bits set to 1 and
11675   ///         not used bits set to 0.
11676   APInt getUsedBits() const {
11677     // Reproduce the trunc(lshr) sequence:
11678     // - Start from the truncated value.
11679     // - Zero extend to the desired bit width.
11680     // - Shift left.
11681     assert(Origin && "No original load to compare against.");
11682     unsigned BitWidth = Origin->getValueSizeInBits(0);
11683     assert(Inst && "This slice is not bound to an instruction");
11684     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11685            "Extracted slice is bigger than the whole type!");
11686     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11687     UsedBits.setAllBits();
11688     UsedBits = UsedBits.zext(BitWidth);
11689     UsedBits <<= Shift;
11690     return UsedBits;
11691   }
11692 
11693   /// \brief Get the size of the slice to be loaded in bytes.
11694   unsigned getLoadedSize() const {
11695     unsigned SliceSize = getUsedBits().countPopulation();
11696     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11697     return SliceSize / 8;
11698   }
11699 
11700   /// \brief Get the type that will be loaded for this slice.
11701   /// Note: This may not be the final type for the slice.
11702   EVT getLoadedType() const {
11703     assert(DAG && "Missing context");
11704     LLVMContext &Ctxt = *DAG->getContext();
11705     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11706   }
11707 
11708   /// \brief Get the alignment of the load used for this slice.
11709   unsigned getAlignment() const {
11710     unsigned Alignment = Origin->getAlignment();
11711     unsigned Offset = getOffsetFromBase();
11712     if (Offset != 0)
11713       Alignment = MinAlign(Alignment, Alignment + Offset);
11714     return Alignment;
11715   }
11716 
11717   /// \brief Check if this slice can be rewritten with legal operations.
11718   bool isLegal() const {
11719     // An invalid slice is not legal.
11720     if (!Origin || !Inst || !DAG)
11721       return false;
11722 
11723     // Offsets are for indexed load only, we do not handle that.
11724     if (!Origin->getOffset().isUndef())
11725       return false;
11726 
11727     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11728 
11729     // Check that the type is legal.
11730     EVT SliceType = getLoadedType();
11731     if (!TLI.isTypeLegal(SliceType))
11732       return false;
11733 
11734     // Check that the load is legal for this type.
11735     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11736       return false;
11737 
11738     // Check that the offset can be computed.
11739     // 1. Check its type.
11740     EVT PtrType = Origin->getBasePtr().getValueType();
11741     if (PtrType == MVT::Untyped || PtrType.isExtended())
11742       return false;
11743 
11744     // 2. Check that it fits in the immediate.
11745     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11746       return false;
11747 
11748     // 3. Check that the computation is legal.
11749     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11750       return false;
11751 
11752     // Check that the zext is legal if it needs one.
11753     EVT TruncateType = Inst->getValueType(0);
11754     if (TruncateType != SliceType &&
11755         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11756       return false;
11757 
11758     return true;
11759   }
11760 
11761   /// \brief Get the offset in bytes of this slice in the original chunk of
11762   /// bits.
11763   /// \pre DAG != nullptr.
11764   uint64_t getOffsetFromBase() const {
11765     assert(DAG && "Missing context.");
11766     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11767     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11768     uint64_t Offset = Shift / 8;
11769     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11770     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11771            "The size of the original loaded type is not a multiple of a"
11772            " byte.");
11773     // If Offset is bigger than TySizeInBytes, it means we are loading all
11774     // zeros. This should have been optimized before in the process.
11775     assert(TySizeInBytes > Offset &&
11776            "Invalid shift amount for given loaded size");
11777     if (IsBigEndian)
11778       Offset = TySizeInBytes - Offset - getLoadedSize();
11779     return Offset;
11780   }
11781 
11782   /// \brief Generate the sequence of instructions to load the slice
11783   /// represented by this object and redirect the uses of this slice to
11784   /// this new sequence of instructions.
11785   /// \pre this->Inst && this->Origin are valid Instructions and this
11786   /// object passed the legal check: LoadedSlice::isLegal returned true.
11787   /// \return The last instruction of the sequence used to load the slice.
11788   SDValue loadSlice() const {
11789     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11790     const SDValue &OldBaseAddr = Origin->getBasePtr();
11791     SDValue BaseAddr = OldBaseAddr;
11792     // Get the offset in that chunk of bytes w.r.t. the endianness.
11793     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11794     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11795     if (Offset) {
11796       // BaseAddr = BaseAddr + Offset.
11797       EVT ArithType = BaseAddr.getValueType();
11798       SDLoc DL(Origin);
11799       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11800                               DAG->getConstant(Offset, DL, ArithType));
11801     }
11802 
11803     // Create the type of the loaded slice according to its size.
11804     EVT SliceType = getLoadedType();
11805 
11806     // Create the load for the slice.
11807     SDValue LastInst =
11808         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11809                      Origin->getPointerInfo().getWithOffset(Offset),
11810                      getAlignment(), Origin->getMemOperand()->getFlags());
11811     // If the final type is not the same as the loaded type, this means that
11812     // we have to pad with zero. Create a zero extend for that.
11813     EVT FinalType = Inst->getValueType(0);
11814     if (SliceType != FinalType)
11815       LastInst =
11816           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11817     return LastInst;
11818   }
11819 
11820   /// \brief Check if this slice can be merged with an expensive cross register
11821   /// bank copy. E.g.,
11822   /// i = load i32
11823   /// f = bitcast i32 i to float
11824   bool canMergeExpensiveCrossRegisterBankCopy() const {
11825     if (!Inst || !Inst->hasOneUse())
11826       return false;
11827     SDNode *Use = *Inst->use_begin();
11828     if (Use->getOpcode() != ISD::BITCAST)
11829       return false;
11830     assert(DAG && "Missing context");
11831     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11832     EVT ResVT = Use->getValueType(0);
11833     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11834     const TargetRegisterClass *ArgRC =
11835         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11836     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11837       return false;
11838 
11839     // At this point, we know that we perform a cross-register-bank copy.
11840     // Check if it is expensive.
11841     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11842     // Assume bitcasts are cheap, unless both register classes do not
11843     // explicitly share a common sub class.
11844     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11845       return false;
11846 
11847     // Check if it will be merged with the load.
11848     // 1. Check the alignment constraint.
11849     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11850         ResVT.getTypeForEVT(*DAG->getContext()));
11851 
11852     if (RequiredAlignment > getAlignment())
11853       return false;
11854 
11855     // 2. Check that the load is a legal operation for that type.
11856     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11857       return false;
11858 
11859     // 3. Check that we do not have a zext in the way.
11860     if (Inst->getValueType(0) != getLoadedType())
11861       return false;
11862 
11863     return true;
11864   }
11865 };
11866 }
11867 
11868 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11869 /// \p UsedBits looks like 0..0 1..1 0..0.
11870 static bool areUsedBitsDense(const APInt &UsedBits) {
11871   // If all the bits are one, this is dense!
11872   if (UsedBits.isAllOnesValue())
11873     return true;
11874 
11875   // Get rid of the unused bits on the right.
11876   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11877   // Get rid of the unused bits on the left.
11878   if (NarrowedUsedBits.countLeadingZeros())
11879     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11880   // Check that the chunk of bits is completely used.
11881   return NarrowedUsedBits.isAllOnesValue();
11882 }
11883 
11884 /// \brief Check whether or not \p First and \p Second are next to each other
11885 /// in memory. This means that there is no hole between the bits loaded
11886 /// by \p First and the bits loaded by \p Second.
11887 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11888                                      const LoadedSlice &Second) {
11889   assert(First.Origin == Second.Origin && First.Origin &&
11890          "Unable to match different memory origins.");
11891   APInt UsedBits = First.getUsedBits();
11892   assert((UsedBits & Second.getUsedBits()) == 0 &&
11893          "Slices are not supposed to overlap.");
11894   UsedBits |= Second.getUsedBits();
11895   return areUsedBitsDense(UsedBits);
11896 }
11897 
11898 /// \brief Adjust the \p GlobalLSCost according to the target
11899 /// paring capabilities and the layout of the slices.
11900 /// \pre \p GlobalLSCost should account for at least as many loads as
11901 /// there is in the slices in \p LoadedSlices.
11902 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11903                                  LoadedSlice::Cost &GlobalLSCost) {
11904   unsigned NumberOfSlices = LoadedSlices.size();
11905   // If there is less than 2 elements, no pairing is possible.
11906   if (NumberOfSlices < 2)
11907     return;
11908 
11909   // Sort the slices so that elements that are likely to be next to each
11910   // other in memory are next to each other in the list.
11911   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11912             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11913     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11914     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11915   });
11916   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11917   // First (resp. Second) is the first (resp. Second) potentially candidate
11918   // to be placed in a paired load.
11919   const LoadedSlice *First = nullptr;
11920   const LoadedSlice *Second = nullptr;
11921   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11922                 // Set the beginning of the pair.
11923                                                            First = Second) {
11924 
11925     Second = &LoadedSlices[CurrSlice];
11926 
11927     // If First is NULL, it means we start a new pair.
11928     // Get to the next slice.
11929     if (!First)
11930       continue;
11931 
11932     EVT LoadedType = First->getLoadedType();
11933 
11934     // If the types of the slices are different, we cannot pair them.
11935     if (LoadedType != Second->getLoadedType())
11936       continue;
11937 
11938     // Check if the target supplies paired loads for this type.
11939     unsigned RequiredAlignment = 0;
11940     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11941       // move to the next pair, this type is hopeless.
11942       Second = nullptr;
11943       continue;
11944     }
11945     // Check if we meet the alignment requirement.
11946     if (RequiredAlignment > First->getAlignment())
11947       continue;
11948 
11949     // Check that both loads are next to each other in memory.
11950     if (!areSlicesNextToEachOther(*First, *Second))
11951       continue;
11952 
11953     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11954     --GlobalLSCost.Loads;
11955     // Move to the next pair.
11956     Second = nullptr;
11957   }
11958 }
11959 
11960 /// \brief Check the profitability of all involved LoadedSlice.
11961 /// Currently, it is considered profitable if there is exactly two
11962 /// involved slices (1) which are (2) next to each other in memory, and
11963 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11964 ///
11965 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11966 /// the elements themselves.
11967 ///
11968 /// FIXME: When the cost model will be mature enough, we can relax
11969 /// constraints (1) and (2).
11970 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11971                                 const APInt &UsedBits, bool ForCodeSize) {
11972   unsigned NumberOfSlices = LoadedSlices.size();
11973   if (StressLoadSlicing)
11974     return NumberOfSlices > 1;
11975 
11976   // Check (1).
11977   if (NumberOfSlices != 2)
11978     return false;
11979 
11980   // Check (2).
11981   if (!areUsedBitsDense(UsedBits))
11982     return false;
11983 
11984   // Check (3).
11985   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11986   // The original code has one big load.
11987   OrigCost.Loads = 1;
11988   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11989     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11990     // Accumulate the cost of all the slices.
11991     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11992     GlobalSlicingCost += SliceCost;
11993 
11994     // Account as cost in the original configuration the gain obtained
11995     // with the current slices.
11996     OrigCost.addSliceGain(LS);
11997   }
11998 
11999   // If the target supports paired load, adjust the cost accordingly.
12000   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12001   return OrigCost > GlobalSlicingCost;
12002 }
12003 
12004 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12005 /// operations, split it in the various pieces being extracted.
12006 ///
12007 /// This sort of thing is introduced by SROA.
12008 /// This slicing takes care not to insert overlapping loads.
12009 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
12010 bool DAGCombiner::SliceUpLoad(SDNode *N) {
12011   if (Level < AfterLegalizeDAG)
12012     return false;
12013 
12014   LoadSDNode *LD = cast<LoadSDNode>(N);
12015   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12016       !LD->getValueType(0).isInteger())
12017     return false;
12018 
12019   // Keep track of already used bits to detect overlapping values.
12020   // In that case, we will just abort the transformation.
12021   APInt UsedBits(LD->getValueSizeInBits(0), 0);
12022 
12023   SmallVector<LoadedSlice, 4> LoadedSlices;
12024 
12025   // Check if this load is used as several smaller chunks of bits.
12026   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12027   // of computation for each trunc.
12028   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12029        UI != UIEnd; ++UI) {
12030     // Skip the uses of the chain.
12031     if (UI.getUse().getResNo() != 0)
12032       continue;
12033 
12034     SDNode *User = *UI;
12035     unsigned Shift = 0;
12036 
12037     // Check if this is a trunc(lshr).
12038     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12039         isa<ConstantSDNode>(User->getOperand(1))) {
12040       Shift = User->getConstantOperandVal(1);
12041       User = *User->use_begin();
12042     }
12043 
12044     // At this point, User is a Truncate, iff we encountered, trunc or
12045     // trunc(lshr).
12046     if (User->getOpcode() != ISD::TRUNCATE)
12047       return false;
12048 
12049     // The width of the type must be a power of 2 and greater than 8-bits.
12050     // Otherwise the load cannot be represented in LLVM IR.
12051     // Moreover, if we shifted with a non-8-bits multiple, the slice
12052     // will be across several bytes. We do not support that.
12053     unsigned Width = User->getValueSizeInBits(0);
12054     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12055       return 0;
12056 
12057     // Build the slice for this chain of computations.
12058     LoadedSlice LS(User, LD, Shift, &DAG);
12059     APInt CurrentUsedBits = LS.getUsedBits();
12060 
12061     // Check if this slice overlaps with another.
12062     if ((CurrentUsedBits & UsedBits) != 0)
12063       return false;
12064     // Update the bits used globally.
12065     UsedBits |= CurrentUsedBits;
12066 
12067     // Check if the new slice would be legal.
12068     if (!LS.isLegal())
12069       return false;
12070 
12071     // Record the slice.
12072     LoadedSlices.push_back(LS);
12073   }
12074 
12075   // Abort slicing if it does not seem to be profitable.
12076   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12077     return false;
12078 
12079   ++SlicedLoads;
12080 
12081   // Rewrite each chain to use an independent load.
12082   // By construction, each chain can be represented by a unique load.
12083 
12084   // Prepare the argument for the new token factor for all the slices.
12085   SmallVector<SDValue, 8> ArgChains;
12086   for (SmallVectorImpl<LoadedSlice>::const_iterator
12087            LSIt = LoadedSlices.begin(),
12088            LSItEnd = LoadedSlices.end();
12089        LSIt != LSItEnd; ++LSIt) {
12090     SDValue SliceInst = LSIt->loadSlice();
12091     CombineTo(LSIt->Inst, SliceInst, true);
12092     if (SliceInst.getOpcode() != ISD::LOAD)
12093       SliceInst = SliceInst.getOperand(0);
12094     assert(SliceInst->getOpcode() == ISD::LOAD &&
12095            "It takes more than a zext to get to the loaded slice!!");
12096     ArgChains.push_back(SliceInst.getValue(1));
12097   }
12098 
12099   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12100                               ArgChains);
12101   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12102   AddToWorklist(Chain.getNode());
12103   return true;
12104 }
12105 
12106 /// Check to see if V is (and load (ptr), imm), where the load is having
12107 /// specific bytes cleared out.  If so, return the byte size being masked out
12108 /// and the shift amount.
12109 static std::pair<unsigned, unsigned>
12110 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12111   std::pair<unsigned, unsigned> Result(0, 0);
12112 
12113   // Check for the structure we're looking for.
12114   if (V->getOpcode() != ISD::AND ||
12115       !isa<ConstantSDNode>(V->getOperand(1)) ||
12116       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12117     return Result;
12118 
12119   // Check the chain and pointer.
12120   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12121   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12122 
12123   // The store should be chained directly to the load or be an operand of a
12124   // tokenfactor.
12125   if (LD == Chain.getNode())
12126     ; // ok.
12127   else if (Chain->getOpcode() != ISD::TokenFactor)
12128     return Result; // Fail.
12129   else {
12130     bool isOk = false;
12131     for (const SDValue &ChainOp : Chain->op_values())
12132       if (ChainOp.getNode() == LD) {
12133         isOk = true;
12134         break;
12135       }
12136     if (!isOk) return Result;
12137   }
12138 
12139   // This only handles simple types.
12140   if (V.getValueType() != MVT::i16 &&
12141       V.getValueType() != MVT::i32 &&
12142       V.getValueType() != MVT::i64)
12143     return Result;
12144 
12145   // Check the constant mask.  Invert it so that the bits being masked out are
12146   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12147   // follow the sign bit for uniformity.
12148   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12149   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12150   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12151   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12152   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12153   if (NotMaskLZ == 64) return Result;  // All zero mask.
12154 
12155   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12156   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12157     return Result;
12158 
12159   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12160   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12161     NotMaskLZ -= 64-V.getValueSizeInBits();
12162 
12163   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12164   switch (MaskedBytes) {
12165   case 1:
12166   case 2:
12167   case 4: break;
12168   default: return Result; // All one mask, or 5-byte mask.
12169   }
12170 
12171   // Verify that the first bit starts at a multiple of mask so that the access
12172   // is aligned the same as the access width.
12173   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12174 
12175   Result.first = MaskedBytes;
12176   Result.second = NotMaskTZ/8;
12177   return Result;
12178 }
12179 
12180 
12181 /// Check to see if IVal is something that provides a value as specified by
12182 /// MaskInfo. If so, replace the specified store with a narrower store of
12183 /// truncated IVal.
12184 static SDNode *
12185 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12186                                 SDValue IVal, StoreSDNode *St,
12187                                 DAGCombiner *DC) {
12188   unsigned NumBytes = MaskInfo.first;
12189   unsigned ByteShift = MaskInfo.second;
12190   SelectionDAG &DAG = DC->getDAG();
12191 
12192   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12193   // that uses this.  If not, this is not a replacement.
12194   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12195                                   ByteShift*8, (ByteShift+NumBytes)*8);
12196   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12197 
12198   // Check that it is legal on the target to do this.  It is legal if the new
12199   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12200   // legalization.
12201   MVT VT = MVT::getIntegerVT(NumBytes*8);
12202   if (!DC->isTypeLegal(VT))
12203     return nullptr;
12204 
12205   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12206   // shifted by ByteShift and truncated down to NumBytes.
12207   if (ByteShift) {
12208     SDLoc DL(IVal);
12209     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12210                        DAG.getConstant(ByteShift*8, DL,
12211                                     DC->getShiftAmountTy(IVal.getValueType())));
12212   }
12213 
12214   // Figure out the offset for the store and the alignment of the access.
12215   unsigned StOffset;
12216   unsigned NewAlign = St->getAlignment();
12217 
12218   if (DAG.getDataLayout().isLittleEndian())
12219     StOffset = ByteShift;
12220   else
12221     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12222 
12223   SDValue Ptr = St->getBasePtr();
12224   if (StOffset) {
12225     SDLoc DL(IVal);
12226     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12227                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12228     NewAlign = MinAlign(NewAlign, StOffset);
12229   }
12230 
12231   // Truncate down to the new size.
12232   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12233 
12234   ++OpsNarrowed;
12235   return DAG
12236       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12237                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12238       .getNode();
12239 }
12240 
12241 
12242 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12243 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12244 /// narrowing the load and store if it would end up being a win for performance
12245 /// or code size.
12246 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12247   StoreSDNode *ST  = cast<StoreSDNode>(N);
12248   if (ST->isVolatile())
12249     return SDValue();
12250 
12251   SDValue Chain = ST->getChain();
12252   SDValue Value = ST->getValue();
12253   SDValue Ptr   = ST->getBasePtr();
12254   EVT VT = Value.getValueType();
12255 
12256   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12257     return SDValue();
12258 
12259   unsigned Opc = Value.getOpcode();
12260 
12261   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12262   // is a byte mask indicating a consecutive number of bytes, check to see if
12263   // Y is known to provide just those bytes.  If so, we try to replace the
12264   // load + replace + store sequence with a single (narrower) store, which makes
12265   // the load dead.
12266   if (Opc == ISD::OR) {
12267     std::pair<unsigned, unsigned> MaskedLoad;
12268     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12269     if (MaskedLoad.first)
12270       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12271                                                   Value.getOperand(1), ST,this))
12272         return SDValue(NewST, 0);
12273 
12274     // Or is commutative, so try swapping X and Y.
12275     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12276     if (MaskedLoad.first)
12277       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12278                                                   Value.getOperand(0), ST,this))
12279         return SDValue(NewST, 0);
12280   }
12281 
12282   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12283       Value.getOperand(1).getOpcode() != ISD::Constant)
12284     return SDValue();
12285 
12286   SDValue N0 = Value.getOperand(0);
12287   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12288       Chain == SDValue(N0.getNode(), 1)) {
12289     LoadSDNode *LD = cast<LoadSDNode>(N0);
12290     if (LD->getBasePtr() != Ptr ||
12291         LD->getPointerInfo().getAddrSpace() !=
12292         ST->getPointerInfo().getAddrSpace())
12293       return SDValue();
12294 
12295     // Find the type to narrow it the load / op / store to.
12296     SDValue N1 = Value.getOperand(1);
12297     unsigned BitWidth = N1.getValueSizeInBits();
12298     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12299     if (Opc == ISD::AND)
12300       Imm ^= APInt::getAllOnesValue(BitWidth);
12301     if (Imm == 0 || Imm.isAllOnesValue())
12302       return SDValue();
12303     unsigned ShAmt = Imm.countTrailingZeros();
12304     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12305     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12306     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12307     // The narrowing should be profitable, the load/store operation should be
12308     // legal (or custom) and the store size should be equal to the NewVT width.
12309     while (NewBW < BitWidth &&
12310            (NewVT.getStoreSizeInBits() != NewBW ||
12311             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12312             !TLI.isNarrowingProfitable(VT, NewVT))) {
12313       NewBW = NextPowerOf2(NewBW);
12314       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12315     }
12316     if (NewBW >= BitWidth)
12317       return SDValue();
12318 
12319     // If the lsb changed does not start at the type bitwidth boundary,
12320     // start at the previous one.
12321     if (ShAmt % NewBW)
12322       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12323     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12324                                    std::min(BitWidth, ShAmt + NewBW));
12325     if ((Imm & Mask) == Imm) {
12326       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12327       if (Opc == ISD::AND)
12328         NewImm ^= APInt::getAllOnesValue(NewBW);
12329       uint64_t PtrOff = ShAmt / 8;
12330       // For big endian targets, we need to adjust the offset to the pointer to
12331       // load the correct bytes.
12332       if (DAG.getDataLayout().isBigEndian())
12333         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12334 
12335       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12336       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12337       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12338         return SDValue();
12339 
12340       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12341                                    Ptr.getValueType(), Ptr,
12342                                    DAG.getConstant(PtrOff, SDLoc(LD),
12343                                                    Ptr.getValueType()));
12344       SDValue NewLD =
12345           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12346                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12347                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12348       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12349                                    DAG.getConstant(NewImm, SDLoc(Value),
12350                                                    NewVT));
12351       SDValue NewST =
12352           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12353                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12354 
12355       AddToWorklist(NewPtr.getNode());
12356       AddToWorklist(NewLD.getNode());
12357       AddToWorklist(NewVal.getNode());
12358       WorklistRemover DeadNodes(*this);
12359       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12360       ++OpsNarrowed;
12361       return NewST;
12362     }
12363   }
12364 
12365   return SDValue();
12366 }
12367 
12368 /// For a given floating point load / store pair, if the load value isn't used
12369 /// by any other operations, then consider transforming the pair to integer
12370 /// load / store operations if the target deems the transformation profitable.
12371 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12372   StoreSDNode *ST  = cast<StoreSDNode>(N);
12373   SDValue Chain = ST->getChain();
12374   SDValue Value = ST->getValue();
12375   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12376       Value.hasOneUse() &&
12377       Chain == SDValue(Value.getNode(), 1)) {
12378     LoadSDNode *LD = cast<LoadSDNode>(Value);
12379     EVT VT = LD->getMemoryVT();
12380     if (!VT.isFloatingPoint() ||
12381         VT != ST->getMemoryVT() ||
12382         LD->isNonTemporal() ||
12383         ST->isNonTemporal() ||
12384         LD->getPointerInfo().getAddrSpace() != 0 ||
12385         ST->getPointerInfo().getAddrSpace() != 0)
12386       return SDValue();
12387 
12388     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12389     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12390         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12391         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12392         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12393       return SDValue();
12394 
12395     unsigned LDAlign = LD->getAlignment();
12396     unsigned STAlign = ST->getAlignment();
12397     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12398     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12399     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12400       return SDValue();
12401 
12402     SDValue NewLD =
12403         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12404                     LD->getPointerInfo(), LDAlign);
12405 
12406     SDValue NewST =
12407         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12408                      ST->getPointerInfo(), STAlign);
12409 
12410     AddToWorklist(NewLD.getNode());
12411     AddToWorklist(NewST.getNode());
12412     WorklistRemover DeadNodes(*this);
12413     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12414     ++LdStFP2Int;
12415     return NewST;
12416   }
12417 
12418   return SDValue();
12419 }
12420 
12421 // This is a helper function for visitMUL to check the profitability
12422 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12423 // MulNode is the original multiply, AddNode is (add x, c1),
12424 // and ConstNode is c2.
12425 //
12426 // If the (add x, c1) has multiple uses, we could increase
12427 // the number of adds if we make this transformation.
12428 // It would only be worth doing this if we can remove a
12429 // multiply in the process. Check for that here.
12430 // To illustrate:
12431 //     (A + c1) * c3
12432 //     (A + c2) * c3
12433 // We're checking for cases where we have common "c3 * A" expressions.
12434 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12435                                               SDValue &AddNode,
12436                                               SDValue &ConstNode) {
12437   APInt Val;
12438 
12439   // If the add only has one use, this would be OK to do.
12440   if (AddNode.getNode()->hasOneUse())
12441     return true;
12442 
12443   // Walk all the users of the constant with which we're multiplying.
12444   for (SDNode *Use : ConstNode->uses()) {
12445 
12446     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12447       continue;
12448 
12449     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12450       SDNode *OtherOp;
12451       SDNode *MulVar = AddNode.getOperand(0).getNode();
12452 
12453       // OtherOp is what we're multiplying against the constant.
12454       if (Use->getOperand(0) == ConstNode)
12455         OtherOp = Use->getOperand(1).getNode();
12456       else
12457         OtherOp = Use->getOperand(0).getNode();
12458 
12459       // Check to see if multiply is with the same operand of our "add".
12460       //
12461       //     ConstNode  = CONST
12462       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12463       //     ...
12464       //     AddNode  = (A + c1)  <-- MulVar is A.
12465       //         = AddNode * ConstNode   <-- current visiting instruction.
12466       //
12467       // If we make this transformation, we will have a common
12468       // multiply (ConstNode * A) that we can save.
12469       if (OtherOp == MulVar)
12470         return true;
12471 
12472       // Now check to see if a future expansion will give us a common
12473       // multiply.
12474       //
12475       //     ConstNode  = CONST
12476       //     AddNode    = (A + c1)
12477       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12478       //     ...
12479       //     OtherOp = (A + c2)
12480       //     Use     = OtherOp * ConstNode <-- visiting Use.
12481       //
12482       // If we make this transformation, we will have a common
12483       // multiply (CONST * A) after we also do the same transformation
12484       // to the "t2" instruction.
12485       if (OtherOp->getOpcode() == ISD::ADD &&
12486           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12487           OtherOp->getOperand(0).getNode() == MulVar)
12488         return true;
12489     }
12490   }
12491 
12492   // Didn't find a case where this would be profitable.
12493   return false;
12494 }
12495 
12496 static SDValue peekThroughBitcast(SDValue V) {
12497   while (V.getOpcode() == ISD::BITCAST)
12498     V = V.getOperand(0);
12499   return V;
12500 }
12501 
12502 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12503                                          unsigned NumStores) {
12504   SmallVector<SDValue, 8> Chains;
12505   SmallPtrSet<const SDNode *, 8> Visited;
12506   SDLoc StoreDL(StoreNodes[0].MemNode);
12507 
12508   for (unsigned i = 0; i < NumStores; ++i) {
12509     Visited.insert(StoreNodes[i].MemNode);
12510   }
12511 
12512   // don't include nodes that are children
12513   for (unsigned i = 0; i < NumStores; ++i) {
12514     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12515       Chains.push_back(StoreNodes[i].MemNode->getChain());
12516   }
12517 
12518   assert(Chains.size() > 0 && "Chain should have generated a chain");
12519   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12520 }
12521 
12522 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12523     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12524     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12525   // Make sure we have something to merge.
12526   if (NumStores < 2)
12527     return false;
12528 
12529   // The latest Node in the DAG.
12530   SDLoc DL(StoreNodes[0].MemNode);
12531 
12532   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12533   unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12534   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12535 
12536   EVT StoreTy;
12537   if (UseVector) {
12538     unsigned Elts = NumStores * NumMemElts;
12539     // Get the type for the merged vector store.
12540     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12541   } else
12542     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12543 
12544   SDValue StoredVal;
12545   if (UseVector) {
12546     if (IsConstantSrc) {
12547       SmallVector<SDValue, 8> BuildVector;
12548       for (unsigned I = 0; I != NumStores; ++I) {
12549         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12550         SDValue Val = St->getValue();
12551         // If constant is of the wrong type, convert it now.
12552         if (MemVT != Val.getValueType()) {
12553           Val = peekThroughBitcast(Val);
12554           // Deal with constants of wrong size.
12555           if (ElementSizeBytes * 8 != Val.getValueSizeInBits()) {
12556             EVT IntMemVT =
12557                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12558             if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val))
12559               Val = DAG.getConstant(
12560                   CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(
12561                       8 * ElementSizeBytes),
12562                   SDLoc(CFP), IntMemVT);
12563             else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12564               Val = DAG.getConstant(
12565                   C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes),
12566                   SDLoc(C), IntMemVT);
12567           }
12568           // Make sure correctly size type is the correct type.
12569           Val = DAG.getBitcast(MemVT, Val);
12570         }
12571         BuildVector.push_back(Val);
12572       }
12573       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12574                                                : ISD::BUILD_VECTOR,
12575                               DL, StoreTy, BuildVector);
12576     } else {
12577       SmallVector<SDValue, 8> Ops;
12578       for (unsigned i = 0; i < NumStores; ++i) {
12579         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12580         SDValue Val = peekThroughBitcast(St->getValue());
12581         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12582         // type MemVT. If the underlying value is not the correct
12583         // type, but it is an extraction of an appropriate vector we
12584         // can recast Val to be of the correct type. This may require
12585         // converting between EXTRACT_VECTOR_ELT and
12586         // EXTRACT_SUBVECTOR.
12587         if ((MemVT != Val.getValueType()) &&
12588             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12589              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12590           SDValue Vec = Val.getOperand(0);
12591           EVT MemVTScalarTy = MemVT.getScalarType();
12592           // We may need to add a bitcast here to get types to line up.
12593           if (MemVTScalarTy != Vec.getValueType()) {
12594             unsigned Elts = Vec.getValueType().getSizeInBits() /
12595                             MemVTScalarTy.getSizeInBits();
12596             EVT NewVecTy =
12597                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12598             Vec = DAG.getBitcast(NewVecTy, Vec);
12599           }
12600           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12601                                         : ISD::EXTRACT_VECTOR_ELT;
12602           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12603         }
12604         Ops.push_back(Val);
12605       }
12606 
12607       // Build the extracted vector elements back into a vector.
12608       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12609                                                : ISD::BUILD_VECTOR,
12610                               DL, StoreTy, Ops);
12611     }
12612   } else {
12613     // We should always use a vector store when merging extracted vector
12614     // elements, so this path implies a store of constants.
12615     assert(IsConstantSrc && "Merged vector elements should use vector store");
12616 
12617     APInt StoreInt(SizeInBits, 0);
12618 
12619     // Construct a single integer constant which is made of the smaller
12620     // constant inputs.
12621     bool IsLE = DAG.getDataLayout().isLittleEndian();
12622     for (unsigned i = 0; i < NumStores; ++i) {
12623       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12624       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12625 
12626       SDValue Val = St->getValue();
12627       StoreInt <<= ElementSizeBytes * 8;
12628       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12629         StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12630       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12631         StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12632       } else {
12633         llvm_unreachable("Invalid constant element type");
12634       }
12635     }
12636 
12637     // Create the new Load and Store operations.
12638     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12639   }
12640 
12641   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12642   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12643 
12644   // make sure we use trunc store if it's necessary to be legal.
12645   SDValue NewStore;
12646   if (!UseTrunc) {
12647     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12648                             FirstInChain->getPointerInfo(),
12649                             FirstInChain->getAlignment());
12650   } else { // Must be realized as a trunc store
12651     EVT LegalizedStoredValueTy =
12652         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12653     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12654     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12655     SDValue ExtendedStoreVal =
12656         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12657                         LegalizedStoredValueTy);
12658     NewStore = DAG.getTruncStore(
12659         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12660         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12661         FirstInChain->getAlignment(),
12662         FirstInChain->getMemOperand()->getFlags());
12663   }
12664 
12665   // Replace all merged stores with the new store.
12666   for (unsigned i = 0; i < NumStores; ++i)
12667     CombineTo(StoreNodes[i].MemNode, NewStore);
12668 
12669   AddToWorklist(NewChain.getNode());
12670   return true;
12671 }
12672 
12673 void DAGCombiner::getStoreMergeCandidates(
12674     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12675   // This holds the base pointer, index, and the offset in bytes from the base
12676   // pointer.
12677   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12678   EVT MemVT = St->getMemoryVT();
12679 
12680   SDValue Val = peekThroughBitcast(St->getValue());
12681   // We must have a base and an offset.
12682   if (!BasePtr.getBase().getNode())
12683     return;
12684 
12685   // Do not handle stores to undef base pointers.
12686   if (BasePtr.getBase().isUndef())
12687     return;
12688 
12689   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
12690   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12691                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12692   bool IsLoadSrc = isa<LoadSDNode>(Val);
12693   BaseIndexOffset LBasePtr;
12694   // Match on loadbaseptr if relevant.
12695   EVT LoadVT;
12696   if (IsLoadSrc) {
12697     auto *Ld = cast<LoadSDNode>(Val);
12698     LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12699     LoadVT = Ld->getMemoryVT();
12700     // Load and store should be the same type.
12701     if (MemVT != LoadVT)
12702       return;
12703   }
12704   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12705                             int64_t &Offset) -> bool {
12706     if (Other->isVolatile() || Other->isIndexed())
12707       return false;
12708     SDValue Val = peekThroughBitcast(Other->getValue());
12709     // Allow merging constants of different types as integers.
12710     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
12711                                            : Other->getMemoryVT() != MemVT;
12712     if (IsLoadSrc) {
12713       if (NoTypeMatch)
12714         return false;
12715       // The Load's Base Ptr must also match
12716       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
12717         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12718         if (LoadVT != OtherLd->getMemoryVT())
12719           return false;
12720         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12721           return false;
12722       } else
12723         return false;
12724     }
12725     if (IsConstantSrc) {
12726       if (NoTypeMatch)
12727         return false;
12728       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
12729         return false;
12730     }
12731     if (IsExtractVecSrc) {
12732       // Do not merge truncated stores here.
12733       if (Other->isTruncatingStore())
12734         return false;
12735       if (!MemVT.bitsEq(Val.getValueType()))
12736         return false;
12737       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
12738           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12739         return false;
12740     }
12741     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12742     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12743   };
12744   // We looking for a root node which is an ancestor to all mergable
12745   // stores. We search up through a load, to our root and then down
12746   // through all children. For instance we will find Store{1,2,3} if
12747   // St is Store1, Store2. or Store3 where the root is not a load
12748   // which always true for nonvolatile ops. TODO: Expand
12749   // the search to find all valid candidates through multiple layers of loads.
12750   //
12751   // Root
12752   // |-------|-------|
12753   // Load    Load    Store3
12754   // |       |
12755   // Store1   Store2
12756   //
12757   // FIXME: We should be able to climb and
12758   // descend TokenFactors to find candidates as well.
12759 
12760   SDNode *RootNode = (St->getChain()).getNode();
12761 
12762   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12763     RootNode = Ldn->getChain().getNode();
12764     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12765       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12766         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
12767           if (I2.getOperandNo() == 0)
12768             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
12769               BaseIndexOffset Ptr;
12770               int64_t PtrDiff;
12771               if (CandidateMatch(OtherST, Ptr, PtrDiff))
12772                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12773             }
12774   } else
12775     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12776       if (I.getOperandNo() == 0)
12777         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12778           BaseIndexOffset Ptr;
12779           int64_t PtrDiff;
12780           if (CandidateMatch(OtherST, Ptr, PtrDiff))
12781             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12782         }
12783 }
12784 
12785 // We need to check that merging these stores does not cause a loop in
12786 // the DAG. Any store candidate may depend on another candidate
12787 // indirectly through its operand (we already consider dependencies
12788 // through the chain). Check in parallel by searching up from
12789 // non-chain operands of candidates.
12790 
12791 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12792     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12793 
12794   // FIXME: We should be able to truncate a full search of
12795   // predecessors by doing a BFS and keeping tabs the originating
12796   // stores from which worklist nodes come from in a similar way to
12797   // TokenFactor simplfication.
12798 
12799   SmallPtrSet<const SDNode *, 16> Visited;
12800   SmallVector<const SDNode *, 8> Worklist;
12801   unsigned int Max = 8192;
12802   // Search Ops of store candidates.
12803   for (unsigned i = 0; i < NumStores; ++i) {
12804     SDNode *n = StoreNodes[i].MemNode;
12805     // Potential loops may happen only through non-chain operands
12806     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12807       Worklist.push_back(n->getOperand(j).getNode());
12808   }
12809   // Search through DAG. We can stop early if we find a store node.
12810   for (unsigned i = 0; i < NumStores; ++i) {
12811     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
12812                                      Max))
12813       return false;
12814     // Check if we ended early, failing conservatively if so.
12815     if (Visited.size() >= Max)
12816       return false;
12817   }
12818   return true;
12819 }
12820 
12821 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12822   if (OptLevel == CodeGenOpt::None)
12823     return false;
12824 
12825   EVT MemVT = St->getMemoryVT();
12826   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12827   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12828 
12829   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12830     return false;
12831 
12832   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12833       Attribute::NoImplicitFloat);
12834 
12835   // This function cannot currently deal with non-byte-sized memory sizes.
12836   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12837     return false;
12838 
12839   if (!MemVT.isSimple())
12840     return false;
12841 
12842   // Perform an early exit check. Do not bother looking at stored values that
12843   // are not constants, loads, or extracted vector elements.
12844   SDValue StoredVal = peekThroughBitcast(St->getValue());
12845   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12846   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12847                        isa<ConstantFPSDNode>(StoredVal);
12848   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12849                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12850 
12851   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12852     return false;
12853 
12854   SmallVector<MemOpLink, 8> StoreNodes;
12855   // Find potential store merge candidates by searching through chain sub-DAG
12856   getStoreMergeCandidates(St, StoreNodes);
12857 
12858   // Check if there is anything to merge.
12859   if (StoreNodes.size() < 2)
12860     return false;
12861 
12862   // Sort the memory operands according to their distance from the
12863   // base pointer.
12864   std::sort(StoreNodes.begin(), StoreNodes.end(),
12865             [](MemOpLink LHS, MemOpLink RHS) {
12866               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12867             });
12868 
12869   // Store Merge attempts to merge the lowest stores. This generally
12870   // works out as if successful, as the remaining stores are checked
12871   // after the first collection of stores is merged. However, in the
12872   // case that a non-mergeable store is found first, e.g., {p[-2],
12873   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12874   // mergeable cases. To prevent this, we prune such stores from the
12875   // front of StoreNodes here.
12876 
12877   bool RV = false;
12878   while (StoreNodes.size() > 1) {
12879     unsigned StartIdx = 0;
12880     while ((StartIdx + 1 < StoreNodes.size()) &&
12881            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12882                StoreNodes[StartIdx + 1].OffsetFromBase)
12883       ++StartIdx;
12884 
12885     // Bail if we don't have enough candidates to merge.
12886     if (StartIdx + 1 >= StoreNodes.size())
12887       return RV;
12888 
12889     if (StartIdx)
12890       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12891 
12892     // Scan the memory operations on the chain and find the first
12893     // non-consecutive store memory address.
12894     unsigned NumConsecutiveStores = 1;
12895     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12896     // Check that the addresses are consecutive starting from the second
12897     // element in the list of stores.
12898     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12899       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12900       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12901         break;
12902       NumConsecutiveStores = i + 1;
12903     }
12904 
12905     if (NumConsecutiveStores < 2) {
12906       StoreNodes.erase(StoreNodes.begin(),
12907                        StoreNodes.begin() + NumConsecutiveStores);
12908       continue;
12909     }
12910 
12911     // Check that we can merge these candidates without causing a cycle
12912     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
12913                                                   NumConsecutiveStores)) {
12914       StoreNodes.erase(StoreNodes.begin(),
12915                        StoreNodes.begin() + NumConsecutiveStores);
12916       continue;
12917     }
12918 
12919     // The node with the lowest store address.
12920     LLVMContext &Context = *DAG.getContext();
12921     const DataLayout &DL = DAG.getDataLayout();
12922 
12923     // Store the constants into memory as one consecutive store.
12924     if (IsConstantSrc) {
12925       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12926       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12927       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12928       unsigned LastLegalType = 1;
12929       unsigned LastLegalVectorType = 1;
12930       bool LastIntegerTrunc = false;
12931       bool NonZero = false;
12932       unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
12933       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12934         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12935         SDValue StoredVal = ST->getValue();
12936         bool IsElementZero = false;
12937         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
12938           IsElementZero = C->isNullValue();
12939         else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
12940           IsElementZero = C->getConstantFPValue()->isNullValue();
12941         if (IsElementZero) {
12942           if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
12943             FirstZeroAfterNonZero = i;
12944         }
12945         NonZero |= !IsElementZero;
12946 
12947         // Find a legal type for the constant store.
12948         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12949         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12950         bool IsFast = false;
12951         if (TLI.isTypeLegal(StoreTy) &&
12952             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12953             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12954                                    FirstStoreAlign, &IsFast) &&
12955             IsFast) {
12956           LastIntegerTrunc = false;
12957           LastLegalType = i + 1;
12958           // Or check whether a truncstore is legal.
12959         } else if (TLI.getTypeAction(Context, StoreTy) ==
12960                    TargetLowering::TypePromoteInteger) {
12961           EVT LegalizedStoredValueTy =
12962               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12963           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12964               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12965               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12966                                      FirstStoreAlign, &IsFast) &&
12967               IsFast) {
12968             LastIntegerTrunc = true;
12969             LastLegalType = i + 1;
12970           }
12971         }
12972 
12973         // We only use vectors if the constant is known to be zero or the target
12974         // allows it and the function is not marked with the noimplicitfloat
12975         // attribute.
12976         if ((!NonZero ||
12977              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12978             !NoVectors) {
12979           // Find a legal type for the vector store.
12980           unsigned Elts = (i + 1) * NumMemElts;
12981           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
12982           if (TLI.isTypeLegal(Ty) &&
12983               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
12984               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12985                                      FirstStoreAlign, &IsFast) &&
12986               IsFast)
12987             LastLegalVectorType = i + 1;
12988         }
12989       }
12990 
12991       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12992       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12993 
12994       // Check if we found a legal integer type that creates a meaningful merge.
12995       if (NumElem < 2) {
12996         // We know that candidate stores are in order and of correct
12997         // shape. While there is no mergeable sequence from the
12998         // beginning one may start later in the sequence. The only
12999         // reason a merge of size N could have failed where another of
13000         // the same size would not have, is if the alignment has
13001         // improved or we've dropped a non-zero value. Drop as many
13002         // candidates as we can here.
13003         unsigned NumSkip = 1;
13004         while (
13005             (NumSkip < NumConsecutiveStores) &&
13006             (NumSkip < FirstZeroAfterNonZero) &&
13007             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
13008           NumSkip++;
13009         }
13010         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13011         continue;
13012       }
13013 
13014       bool Merged = MergeStoresOfConstantsOrVecElts(
13015           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13016       RV |= Merged;
13017 
13018       // Remove merged stores for next iteration.
13019       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13020       continue;
13021     }
13022 
13023     // When extracting multiple vector elements, try to store them
13024     // in one vector store rather than a sequence of scalar stores.
13025     if (IsExtractVecSrc) {
13026       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13027       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13028       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13029       unsigned NumStoresToMerge = 1;
13030       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13031         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13032         SDValue StVal = peekThroughBitcast(St->getValue());
13033         // This restriction could be loosened.
13034         // Bail out if any stored values are not elements extracted from a
13035         // vector. It should be possible to handle mixed sources, but load
13036         // sources need more careful handling (see the block of code below that
13037         // handles consecutive loads).
13038         if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13039             StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13040           return RV;
13041 
13042         // Find a legal type for the vector store.
13043         unsigned Elts = (i + 1) * NumMemElts;
13044         EVT Ty =
13045             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13046         bool IsFast;
13047         if (TLI.isTypeLegal(Ty) &&
13048             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13049             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13050                                    FirstStoreAlign, &IsFast) &&
13051             IsFast)
13052           NumStoresToMerge = i + 1;
13053       }
13054 
13055       // Check if we found a legal integer type that creates a meaningful merge.
13056       if (NumStoresToMerge < 2) {
13057         // We know that candidate stores are in order and of correct
13058         // shape. While there is no mergeable sequence from the
13059         // beginning one may start later in the sequence. The only
13060         // reason a merge of size N could have failed where another of
13061         // the same size would not have, is if the alignment has
13062         // improved. Drop as many candidates as we can here.
13063         unsigned NumSkip = 1;
13064         while ((NumSkip < NumConsecutiveStores) &&
13065                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13066           NumSkip++;
13067 
13068         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13069         continue;
13070       }
13071 
13072       bool Merged = MergeStoresOfConstantsOrVecElts(
13073           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13074       if (!Merged) {
13075         StoreNodes.erase(StoreNodes.begin(),
13076                          StoreNodes.begin() + NumStoresToMerge);
13077         continue;
13078       }
13079       // Remove merged stores for next iteration.
13080       StoreNodes.erase(StoreNodes.begin(),
13081                        StoreNodes.begin() + NumStoresToMerge);
13082       RV = true;
13083       continue;
13084     }
13085 
13086     // Below we handle the case of multiple consecutive stores that
13087     // come from multiple consecutive loads. We merge them into a single
13088     // wide load and a single wide store.
13089 
13090     // Look for load nodes which are used by the stored values.
13091     SmallVector<MemOpLink, 8> LoadNodes;
13092 
13093     // Find acceptable loads. Loads need to have the same chain (token factor),
13094     // must not be zext, volatile, indexed, and they must be consecutive.
13095     BaseIndexOffset LdBasePtr;
13096     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13097       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13098       SDValue Val = peekThroughBitcast(St->getValue());
13099       LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13100       if (!Ld)
13101         break;
13102 
13103       // Loads must only have one use.
13104       if (!Ld->hasNUsesOfValue(1, 0))
13105         break;
13106 
13107       // The memory operands must not be volatile.
13108       if (Ld->isVolatile() || Ld->isIndexed())
13109         break;
13110 
13111       // The stored memory type must be the same.
13112       if (Ld->getMemoryVT() != MemVT)
13113         break;
13114 
13115       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
13116       // If this is not the first ptr that we check.
13117       int64_t LdOffset = 0;
13118       if (LdBasePtr.getBase().getNode()) {
13119         // The base ptr must be the same.
13120         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13121           break;
13122       } else {
13123         // Check that all other base pointers are the same as this one.
13124         LdBasePtr = LdPtr;
13125       }
13126 
13127       // We found a potential memory operand to merge.
13128       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13129     }
13130 
13131     if (LoadNodes.size() < 2) {
13132       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13133       continue;
13134     }
13135 
13136     // If we have load/store pair instructions and we only have two values,
13137     // don't bother merging.
13138     unsigned RequiredAlignment;
13139     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13140         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13141       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13142       continue;
13143     }
13144     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13145     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13146     unsigned FirstStoreAlign = FirstInChain->getAlignment();
13147     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13148     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13149     unsigned FirstLoadAlign = FirstLoad->getAlignment();
13150 
13151     // Scan the memory operations on the chain and find the first
13152     // non-consecutive load memory address. These variables hold the index in
13153     // the store node array.
13154     unsigned LastConsecutiveLoad = 1;
13155     // This variable refers to the size and not index in the array.
13156     unsigned LastLegalVectorType = 1;
13157     unsigned LastLegalIntegerType = 1;
13158     bool isDereferenceable = true;
13159     bool DoIntegerTruncate = false;
13160     StartAddress = LoadNodes[0].OffsetFromBase;
13161     SDValue FirstChain = FirstLoad->getChain();
13162     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13163       // All loads must share the same chain.
13164       if (LoadNodes[i].MemNode->getChain() != FirstChain)
13165         break;
13166 
13167       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13168       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13169         break;
13170       LastConsecutiveLoad = i;
13171 
13172       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13173         isDereferenceable = false;
13174 
13175       // Find a legal type for the vector store.
13176       unsigned Elts = (i + 1) * NumMemElts;
13177       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13178 
13179       bool IsFastSt, IsFastLd;
13180       if (TLI.isTypeLegal(StoreTy) &&
13181           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13182           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13183                                  FirstStoreAlign, &IsFastSt) &&
13184           IsFastSt &&
13185           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13186                                  FirstLoadAlign, &IsFastLd) &&
13187           IsFastLd) {
13188         LastLegalVectorType = i + 1;
13189       }
13190 
13191       // Find a legal type for the integer store.
13192       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13193       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13194       if (TLI.isTypeLegal(StoreTy) &&
13195           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13196           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13197                                  FirstStoreAlign, &IsFastSt) &&
13198           IsFastSt &&
13199           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13200                                  FirstLoadAlign, &IsFastLd) &&
13201           IsFastLd) {
13202         LastLegalIntegerType = i + 1;
13203         DoIntegerTruncate = false;
13204         // Or check whether a truncstore and extload is legal.
13205       } else if (TLI.getTypeAction(Context, StoreTy) ==
13206                  TargetLowering::TypePromoteInteger) {
13207         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13208         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13209             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13210             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13211                                StoreTy) &&
13212             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13213                                StoreTy) &&
13214             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13215             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13216                                    FirstStoreAlign, &IsFastSt) &&
13217             IsFastSt &&
13218             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13219                                    FirstLoadAlign, &IsFastLd) &&
13220             IsFastLd) {
13221           LastLegalIntegerType = i + 1;
13222           DoIntegerTruncate = true;
13223         }
13224       }
13225     }
13226 
13227     // Only use vector types if the vector type is larger than the integer type.
13228     // If they are the same, use integers.
13229     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13230     unsigned LastLegalType =
13231         std::max(LastLegalVectorType, LastLegalIntegerType);
13232 
13233     // We add +1 here because the LastXXX variables refer to location while
13234     // the NumElem refers to array/index size.
13235     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13236     NumElem = std::min(LastLegalType, NumElem);
13237 
13238     if (NumElem < 2) {
13239       // We know that candidate stores are in order and of correct
13240       // shape. While there is no mergeable sequence from the
13241       // beginning one may start later in the sequence. The only
13242       // reason a merge of size N could have failed where another of
13243       // the same size would not have is if the alignment or either
13244       // the load or store has improved. Drop as many candidates as we
13245       // can here.
13246       unsigned NumSkip = 1;
13247       while ((NumSkip < LoadNodes.size()) &&
13248              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13249              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13250         NumSkip++;
13251       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13252       continue;
13253     }
13254 
13255     // Find if it is better to use vectors or integers to load and store
13256     // to memory.
13257     EVT JointMemOpVT;
13258     if (UseVectorTy) {
13259       // Find a legal type for the vector store.
13260       unsigned Elts = NumElem * NumMemElts;
13261       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13262     } else {
13263       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13264       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13265     }
13266 
13267     SDLoc LoadDL(LoadNodes[0].MemNode);
13268     SDLoc StoreDL(StoreNodes[0].MemNode);
13269 
13270     // The merged loads are required to have the same incoming chain, so
13271     // using the first's chain is acceptable.
13272 
13273     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13274     AddToWorklist(NewStoreChain.getNode());
13275 
13276     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13277                                           MachineMemOperand::MODereferenceable:
13278                                           MachineMemOperand::MONone;
13279 
13280     SDValue NewLoad, NewStore;
13281     if (UseVectorTy || !DoIntegerTruncate) {
13282       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13283                             FirstLoad->getBasePtr(),
13284                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13285                             MMOFlags);
13286       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13287                               FirstInChain->getBasePtr(),
13288                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13289     } else { // This must be the truncstore/extload case
13290       EVT ExtendedTy =
13291           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13292       NewLoad =
13293           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13294                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13295                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13296       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13297                                    FirstInChain->getBasePtr(),
13298                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13299                                    FirstInChain->getAlignment(),
13300                                    FirstInChain->getMemOperand()->getFlags());
13301     }
13302 
13303     // Transfer chain users from old loads to the new load.
13304     for (unsigned i = 0; i < NumElem; ++i) {
13305       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13306       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13307                                     SDValue(NewLoad.getNode(), 1));
13308     }
13309 
13310     // Replace the all stores with the new store. Recursively remove
13311     // corresponding value if its no longer used.
13312     for (unsigned i = 0; i < NumElem; ++i) {
13313       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13314       CombineTo(StoreNodes[i].MemNode, NewStore);
13315       if (Val.getNode()->use_empty())
13316         recursivelyDeleteUnusedNodes(Val.getNode());
13317     }
13318 
13319     RV = true;
13320     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13321     continue;
13322   }
13323   return RV;
13324 }
13325 
13326 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13327   SDLoc SL(ST);
13328   SDValue ReplStore;
13329 
13330   // Replace the chain to avoid dependency.
13331   if (ST->isTruncatingStore()) {
13332     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13333                                   ST->getBasePtr(), ST->getMemoryVT(),
13334                                   ST->getMemOperand());
13335   } else {
13336     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13337                              ST->getMemOperand());
13338   }
13339 
13340   // Create token to keep both nodes around.
13341   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13342                               MVT::Other, ST->getChain(), ReplStore);
13343 
13344   // Make sure the new and old chains are cleaned up.
13345   AddToWorklist(Token.getNode());
13346 
13347   // Don't add users to work list.
13348   return CombineTo(ST, Token, false);
13349 }
13350 
13351 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13352   SDValue Value = ST->getValue();
13353   if (Value.getOpcode() == ISD::TargetConstantFP)
13354     return SDValue();
13355 
13356   SDLoc DL(ST);
13357 
13358   SDValue Chain = ST->getChain();
13359   SDValue Ptr = ST->getBasePtr();
13360 
13361   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13362 
13363   // NOTE: If the original store is volatile, this transform must not increase
13364   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13365   // processor operation but an i64 (which is not legal) requires two.  So the
13366   // transform should not be done in this case.
13367 
13368   SDValue Tmp;
13369   switch (CFP->getSimpleValueType(0).SimpleTy) {
13370   default:
13371     llvm_unreachable("Unknown FP type");
13372   case MVT::f16:    // We don't do this for these yet.
13373   case MVT::f80:
13374   case MVT::f128:
13375   case MVT::ppcf128:
13376     return SDValue();
13377   case MVT::f32:
13378     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13379         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13380       ;
13381       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13382                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13383                             MVT::i32);
13384       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13385     }
13386 
13387     return SDValue();
13388   case MVT::f64:
13389     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13390          !ST->isVolatile()) ||
13391         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13392       ;
13393       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13394                             getZExtValue(), SDLoc(CFP), MVT::i64);
13395       return DAG.getStore(Chain, DL, Tmp,
13396                           Ptr, ST->getMemOperand());
13397     }
13398 
13399     if (!ST->isVolatile() &&
13400         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13401       // Many FP stores are not made apparent until after legalize, e.g. for
13402       // argument passing.  Since this is so common, custom legalize the
13403       // 64-bit integer store into two 32-bit stores.
13404       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13405       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13406       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13407       if (DAG.getDataLayout().isBigEndian())
13408         std::swap(Lo, Hi);
13409 
13410       unsigned Alignment = ST->getAlignment();
13411       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13412       AAMDNodes AAInfo = ST->getAAInfo();
13413 
13414       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13415                                  ST->getAlignment(), MMOFlags, AAInfo);
13416       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13417                         DAG.getConstant(4, DL, Ptr.getValueType()));
13418       Alignment = MinAlign(Alignment, 4U);
13419       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13420                                  ST->getPointerInfo().getWithOffset(4),
13421                                  Alignment, MMOFlags, AAInfo);
13422       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13423                          St0, St1);
13424     }
13425 
13426     return SDValue();
13427   }
13428 }
13429 
13430 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13431   StoreSDNode *ST  = cast<StoreSDNode>(N);
13432   SDValue Chain = ST->getChain();
13433   SDValue Value = ST->getValue();
13434   SDValue Ptr   = ST->getBasePtr();
13435 
13436   // If this is a store of a bit convert, store the input value if the
13437   // resultant store does not need a higher alignment than the original.
13438   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13439       ST->isUnindexed()) {
13440     EVT SVT = Value.getOperand(0).getValueType();
13441     if (((!LegalOperations && !ST->isVolatile()) ||
13442          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13443         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13444       unsigned OrigAlign = ST->getAlignment();
13445       bool Fast = false;
13446       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13447                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13448           Fast) {
13449         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13450                             ST->getPointerInfo(), OrigAlign,
13451                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13452       }
13453     }
13454   }
13455 
13456   // Turn 'store undef, Ptr' -> nothing.
13457   if (Value.isUndef() && ST->isUnindexed())
13458     return Chain;
13459 
13460   // Try to infer better alignment information than the store already has.
13461   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13462     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13463       if (Align > ST->getAlignment()) {
13464         SDValue NewStore =
13465             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13466                               ST->getMemoryVT(), Align,
13467                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13468         if (NewStore.getNode() != N)
13469           return CombineTo(ST, NewStore, true);
13470       }
13471     }
13472   }
13473 
13474   // Try transforming a pair floating point load / store ops to integer
13475   // load / store ops.
13476   if (SDValue NewST = TransformFPLoadStorePair(N))
13477     return NewST;
13478 
13479   if (ST->isUnindexed()) {
13480     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13481     // adjacent stores.
13482     if (findBetterNeighborChains(ST)) {
13483       // replaceStoreChain uses CombineTo, which handled all of the worklist
13484       // manipulation. Return the original node to not do anything else.
13485       return SDValue(ST, 0);
13486     }
13487     Chain = ST->getChain();
13488   }
13489 
13490   // FIXME: is there such a thing as a truncating indexed store?
13491   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13492       Value.getValueType().isInteger()) {
13493     // See if we can simplify the input to this truncstore with knowledge that
13494     // only the low bits are being used.  For example:
13495     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13496     SDValue Shorter = DAG.GetDemandedBits(
13497         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13498                                     ST->getMemoryVT().getScalarSizeInBits()));
13499     AddToWorklist(Value.getNode());
13500     if (Shorter.getNode())
13501       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13502                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13503 
13504     // Otherwise, see if we can simplify the operation with
13505     // SimplifyDemandedBits, which only works if the value has a single use.
13506     if (SimplifyDemandedBits(
13507             Value,
13508             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13509                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13510       // Re-visit the store if anything changed and the store hasn't been merged
13511       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13512       // node back to the worklist if necessary, but we also need to re-visit
13513       // the Store node itself.
13514       if (N->getOpcode() != ISD::DELETED_NODE)
13515         AddToWorklist(N);
13516       return SDValue(N, 0);
13517     }
13518   }
13519 
13520   // If this is a load followed by a store to the same location, then the store
13521   // is dead/noop.
13522   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13523     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13524         ST->isUnindexed() && !ST->isVolatile() &&
13525         // There can't be any side effects between the load and store, such as
13526         // a call or store.
13527         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13528       // The store is dead, remove it.
13529       return Chain;
13530     }
13531   }
13532 
13533   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13534     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13535         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13536         ST->getMemoryVT() == ST1->getMemoryVT()) {
13537       // If this is a store followed by a store with the same value to the same
13538       // location, then the store is dead/noop.
13539       if (ST1->getValue() == Value) {
13540         // The store is dead, remove it.
13541         return Chain;
13542       }
13543 
13544       // If this is a store who's preceeding store to the same location
13545       // and no one other node is chained to that store we can effectively
13546       // drop the store. Do not remove stores to undef as they may be used as
13547       // data sinks.
13548       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13549           !ST1->getBasePtr().isUndef()) {
13550         // ST1 is fully overwritten and can be elided. Combine with it's chain
13551         // value.
13552         CombineTo(ST1, ST1->getChain());
13553         return SDValue();
13554       }
13555     }
13556   }
13557 
13558   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13559   // truncating store.  We can do this even if this is already a truncstore.
13560   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13561       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13562       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13563                             ST->getMemoryVT())) {
13564     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13565                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13566   }
13567 
13568   // Only perform this optimization before the types are legal, because we
13569   // don't want to perform this optimization on every DAGCombine invocation.
13570   if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
13571                                            : !LegalTypes) {
13572     for (;;) {
13573       // There can be multiple store sequences on the same chain.
13574       // Keep trying to merge store sequences until we are unable to do so
13575       // or until we merge the last store on the chain.
13576       bool Changed = MergeConsecutiveStores(ST);
13577       if (!Changed) break;
13578       // Return N as merge only uses CombineTo and no worklist clean
13579       // up is necessary.
13580       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13581         return SDValue(N, 0);
13582     }
13583   }
13584 
13585   // Try transforming N to an indexed store.
13586   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13587     return SDValue(N, 0);
13588 
13589   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13590   //
13591   // Make sure to do this only after attempting to merge stores in order to
13592   //  avoid changing the types of some subset of stores due to visit order,
13593   //  preventing their merging.
13594   if (isa<ConstantFPSDNode>(ST->getValue())) {
13595     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13596       return NewSt;
13597   }
13598 
13599   if (SDValue NewSt = splitMergedValStore(ST))
13600     return NewSt;
13601 
13602   return ReduceLoadOpStoreWidth(N);
13603 }
13604 
13605 /// For the instruction sequence of store below, F and I values
13606 /// are bundled together as an i64 value before being stored into memory.
13607 /// Sometimes it is more efficent to generate separate stores for F and I,
13608 /// which can remove the bitwise instructions or sink them to colder places.
13609 ///
13610 ///   (store (or (zext (bitcast F to i32) to i64),
13611 ///              (shl (zext I to i64), 32)), addr)  -->
13612 ///   (store F, addr) and (store I, addr+4)
13613 ///
13614 /// Similarly, splitting for other merged store can also be beneficial, like:
13615 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13616 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13617 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13618 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13619 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13620 ///
13621 /// We allow each target to determine specifically which kind of splitting is
13622 /// supported.
13623 ///
13624 /// The store patterns are commonly seen from the simple code snippet below
13625 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13626 ///   void goo(const std::pair<int, float> &);
13627 ///   hoo() {
13628 ///     ...
13629 ///     goo(std::make_pair(tmp, ftmp));
13630 ///     ...
13631 ///   }
13632 ///
13633 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13634   if (OptLevel == CodeGenOpt::None)
13635     return SDValue();
13636 
13637   SDValue Val = ST->getValue();
13638   SDLoc DL(ST);
13639 
13640   // Match OR operand.
13641   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13642     return SDValue();
13643 
13644   // Match SHL operand and get Lower and Higher parts of Val.
13645   SDValue Op1 = Val.getOperand(0);
13646   SDValue Op2 = Val.getOperand(1);
13647   SDValue Lo, Hi;
13648   if (Op1.getOpcode() != ISD::SHL) {
13649     std::swap(Op1, Op2);
13650     if (Op1.getOpcode() != ISD::SHL)
13651       return SDValue();
13652   }
13653   Lo = Op2;
13654   Hi = Op1.getOperand(0);
13655   if (!Op1.hasOneUse())
13656     return SDValue();
13657 
13658   // Match shift amount to HalfValBitSize.
13659   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13660   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13661   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13662     return SDValue();
13663 
13664   // Lo and Hi are zero-extended from int with size less equal than 32
13665   // to i64.
13666   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13667       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13668       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13669       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13670       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13671       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13672     return SDValue();
13673 
13674   // Use the EVT of low and high parts before bitcast as the input
13675   // of target query.
13676   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13677                   ? Lo.getOperand(0).getValueType()
13678                   : Lo.getValueType();
13679   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13680                    ? Hi.getOperand(0).getValueType()
13681                    : Hi.getValueType();
13682   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13683     return SDValue();
13684 
13685   // Start to split store.
13686   unsigned Alignment = ST->getAlignment();
13687   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13688   AAMDNodes AAInfo = ST->getAAInfo();
13689 
13690   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13691   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13692   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13693   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13694 
13695   SDValue Chain = ST->getChain();
13696   SDValue Ptr = ST->getBasePtr();
13697   // Lower value store.
13698   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13699                              ST->getAlignment(), MMOFlags, AAInfo);
13700   Ptr =
13701       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13702                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13703   // Higher value store.
13704   SDValue St1 =
13705       DAG.getStore(St0, DL, Hi, Ptr,
13706                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13707                    Alignment / 2, MMOFlags, AAInfo);
13708   return St1;
13709 }
13710 
13711 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13712   SDValue InVec = N->getOperand(0);
13713   SDValue InVal = N->getOperand(1);
13714   SDValue EltNo = N->getOperand(2);
13715   SDLoc DL(N);
13716 
13717   // If the inserted element is an UNDEF, just use the input vector.
13718   if (InVal.isUndef())
13719     return InVec;
13720 
13721   EVT VT = InVec.getValueType();
13722 
13723   // Remove redundant insertions:
13724   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
13725   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13726       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
13727     return InVec;
13728 
13729   // Check that we know which element is being inserted
13730   if (!isa<ConstantSDNode>(EltNo))
13731     return SDValue();
13732   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13733 
13734   // Canonicalize insert_vector_elt dag nodes.
13735   // Example:
13736   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13737   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13738   //
13739   // Do this only if the child insert_vector node has one use; also
13740   // do this only if indices are both constants and Idx1 < Idx0.
13741   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13742       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13743     unsigned OtherElt = InVec.getConstantOperandVal(2);
13744     if (Elt < OtherElt) {
13745       // Swap nodes.
13746       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13747                                   InVec.getOperand(0), InVal, EltNo);
13748       AddToWorklist(NewOp.getNode());
13749       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13750                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13751     }
13752   }
13753 
13754   // If we can't generate a legal BUILD_VECTOR, exit
13755   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13756     return SDValue();
13757 
13758   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13759   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13760   // vector elements.
13761   SmallVector<SDValue, 8> Ops;
13762   // Do not combine these two vectors if the output vector will not replace
13763   // the input vector.
13764   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13765     Ops.append(InVec.getNode()->op_begin(),
13766                InVec.getNode()->op_end());
13767   } else if (InVec.isUndef()) {
13768     unsigned NElts = VT.getVectorNumElements();
13769     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13770   } else {
13771     return SDValue();
13772   }
13773 
13774   // Insert the element
13775   if (Elt < Ops.size()) {
13776     // All the operands of BUILD_VECTOR must have the same type;
13777     // we enforce that here.
13778     EVT OpVT = Ops[0].getValueType();
13779     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13780   }
13781 
13782   // Return the new vector
13783   return DAG.getBuildVector(VT, DL, Ops);
13784 }
13785 
13786 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13787     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13788   assert(!OriginalLoad->isVolatile());
13789 
13790   EVT ResultVT = EVE->getValueType(0);
13791   EVT VecEltVT = InVecVT.getVectorElementType();
13792   unsigned Align = OriginalLoad->getAlignment();
13793   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13794       VecEltVT.getTypeForEVT(*DAG.getContext()));
13795 
13796   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13797     return SDValue();
13798 
13799   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13800     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13801   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13802     return SDValue();
13803 
13804   Align = NewAlign;
13805 
13806   SDValue NewPtr = OriginalLoad->getBasePtr();
13807   SDValue Offset;
13808   EVT PtrType = NewPtr.getValueType();
13809   MachinePointerInfo MPI;
13810   SDLoc DL(EVE);
13811   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13812     int Elt = ConstEltNo->getZExtValue();
13813     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13814     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13815     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13816   } else {
13817     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13818     Offset = DAG.getNode(
13819         ISD::MUL, DL, PtrType, Offset,
13820         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13821     MPI = OriginalLoad->getPointerInfo();
13822   }
13823   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13824 
13825   // The replacement we need to do here is a little tricky: we need to
13826   // replace an extractelement of a load with a load.
13827   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13828   // Note that this replacement assumes that the extractvalue is the only
13829   // use of the load; that's okay because we don't want to perform this
13830   // transformation in other cases anyway.
13831   SDValue Load;
13832   SDValue Chain;
13833   if (ResultVT.bitsGT(VecEltVT)) {
13834     // If the result type of vextract is wider than the load, then issue an
13835     // extending load instead.
13836     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13837                                                   VecEltVT)
13838                                    ? ISD::ZEXTLOAD
13839                                    : ISD::EXTLOAD;
13840     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13841                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13842                           Align, OriginalLoad->getMemOperand()->getFlags(),
13843                           OriginalLoad->getAAInfo());
13844     Chain = Load.getValue(1);
13845   } else {
13846     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13847                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13848                        OriginalLoad->getAAInfo());
13849     Chain = Load.getValue(1);
13850     if (ResultVT.bitsLT(VecEltVT))
13851       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13852     else
13853       Load = DAG.getBitcast(ResultVT, Load);
13854   }
13855   WorklistRemover DeadNodes(*this);
13856   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13857   SDValue To[] = { Load, Chain };
13858   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13859   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13860   // worklist explicitly as well.
13861   AddToWorklist(Load.getNode());
13862   AddUsersToWorklist(Load.getNode()); // Add users too
13863   // Make sure to revisit this node to clean it up; it will usually be dead.
13864   AddToWorklist(EVE);
13865   ++OpsNarrowed;
13866   return SDValue(EVE, 0);
13867 }
13868 
13869 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13870   // (vextract (scalar_to_vector val, 0) -> val
13871   SDValue InVec = N->getOperand(0);
13872   EVT VT = InVec.getValueType();
13873   EVT NVT = N->getValueType(0);
13874 
13875   if (InVec.isUndef())
13876     return DAG.getUNDEF(NVT);
13877 
13878   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13879     // Check if the result type doesn't match the inserted element type. A
13880     // SCALAR_TO_VECTOR may truncate the inserted element and the
13881     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13882     SDValue InOp = InVec.getOperand(0);
13883     if (InOp.getValueType() != NVT) {
13884       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13885       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13886     }
13887     return InOp;
13888   }
13889 
13890   SDValue EltNo = N->getOperand(1);
13891   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13892 
13893   // extract_vector_elt (build_vector x, y), 1 -> y
13894   if (ConstEltNo &&
13895       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13896       TLI.isTypeLegal(VT) &&
13897       (InVec.hasOneUse() ||
13898        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13899     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13900     EVT InEltVT = Elt.getValueType();
13901 
13902     // Sometimes build_vector's scalar input types do not match result type.
13903     if (NVT == InEltVT)
13904       return Elt;
13905 
13906     // TODO: It may be useful to truncate if free if the build_vector implicitly
13907     // converts.
13908   }
13909 
13910   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
13911   bool isLE = DAG.getDataLayout().isLittleEndian();
13912   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
13913   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13914       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
13915     SDValue BCSrc = InVec.getOperand(0);
13916     if (BCSrc.getValueType().isScalarInteger())
13917       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13918   }
13919 
13920   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13921   //
13922   // This only really matters if the index is non-constant since other combines
13923   // on the constant elements already work.
13924   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13925       EltNo == InVec.getOperand(2)) {
13926     SDValue Elt = InVec.getOperand(1);
13927     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13928   }
13929 
13930   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13931   // We only perform this optimization before the op legalization phase because
13932   // we may introduce new vector instructions which are not backed by TD
13933   // patterns. For example on AVX, extracting elements from a wide vector
13934   // without using extract_subvector. However, if we can find an underlying
13935   // scalar value, then we can always use that.
13936   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13937     int NumElem = VT.getVectorNumElements();
13938     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13939     // Find the new index to extract from.
13940     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13941 
13942     // Extracting an undef index is undef.
13943     if (OrigElt == -1)
13944       return DAG.getUNDEF(NVT);
13945 
13946     // Select the right vector half to extract from.
13947     SDValue SVInVec;
13948     if (OrigElt < NumElem) {
13949       SVInVec = InVec->getOperand(0);
13950     } else {
13951       SVInVec = InVec->getOperand(1);
13952       OrigElt -= NumElem;
13953     }
13954 
13955     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13956       SDValue InOp = SVInVec.getOperand(OrigElt);
13957       if (InOp.getValueType() != NVT) {
13958         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13959         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13960       }
13961 
13962       return InOp;
13963     }
13964 
13965     // FIXME: We should handle recursing on other vector shuffles and
13966     // scalar_to_vector here as well.
13967 
13968     if (!LegalOperations ||
13969         // FIXME: Should really be just isOperationLegalOrCustom.
13970         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
13971         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
13972       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13973       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13974                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13975     }
13976   }
13977 
13978   bool BCNumEltsChanged = false;
13979   EVT ExtVT = VT.getVectorElementType();
13980   EVT LVT = ExtVT;
13981 
13982   // If the result of load has to be truncated, then it's not necessarily
13983   // profitable.
13984   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13985     return SDValue();
13986 
13987   if (InVec.getOpcode() == ISD::BITCAST) {
13988     // Don't duplicate a load with other uses.
13989     if (!InVec.hasOneUse())
13990       return SDValue();
13991 
13992     EVT BCVT = InVec.getOperand(0).getValueType();
13993     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13994       return SDValue();
13995     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13996       BCNumEltsChanged = true;
13997     InVec = InVec.getOperand(0);
13998     ExtVT = BCVT.getVectorElementType();
13999   }
14000 
14001   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14002   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
14003       ISD::isNormalLoad(InVec.getNode()) &&
14004       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
14005     SDValue Index = N->getOperand(1);
14006     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
14007       if (!OrigLoad->isVolatile()) {
14008         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14009                                                              OrigLoad);
14010       }
14011     }
14012   }
14013 
14014   // Perform only after legalization to ensure build_vector / vector_shuffle
14015   // optimizations have already been done.
14016   if (!LegalOperations) return SDValue();
14017 
14018   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14019   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14020   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14021 
14022   if (ConstEltNo) {
14023     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14024 
14025     LoadSDNode *LN0 = nullptr;
14026     const ShuffleVectorSDNode *SVN = nullptr;
14027     if (ISD::isNormalLoad(InVec.getNode())) {
14028       LN0 = cast<LoadSDNode>(InVec);
14029     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14030                InVec.getOperand(0).getValueType() == ExtVT &&
14031                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14032       // Don't duplicate a load with other uses.
14033       if (!InVec.hasOneUse())
14034         return SDValue();
14035 
14036       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14037     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14038       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14039       // =>
14040       // (load $addr+1*size)
14041 
14042       // Don't duplicate a load with other uses.
14043       if (!InVec.hasOneUse())
14044         return SDValue();
14045 
14046       // If the bit convert changed the number of elements, it is unsafe
14047       // to examine the mask.
14048       if (BCNumEltsChanged)
14049         return SDValue();
14050 
14051       // Select the input vector, guarding against out of range extract vector.
14052       unsigned NumElems = VT.getVectorNumElements();
14053       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14054       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14055 
14056       if (InVec.getOpcode() == ISD::BITCAST) {
14057         // Don't duplicate a load with other uses.
14058         if (!InVec.hasOneUse())
14059           return SDValue();
14060 
14061         InVec = InVec.getOperand(0);
14062       }
14063       if (ISD::isNormalLoad(InVec.getNode())) {
14064         LN0 = cast<LoadSDNode>(InVec);
14065         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14066         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14067       }
14068     }
14069 
14070     // Make sure we found a non-volatile load and the extractelement is
14071     // the only use.
14072     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14073       return SDValue();
14074 
14075     // If Idx was -1 above, Elt is going to be -1, so just return undef.
14076     if (Elt == -1)
14077       return DAG.getUNDEF(LVT);
14078 
14079     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14080   }
14081 
14082   return SDValue();
14083 }
14084 
14085 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
14086 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14087   // We perform this optimization post type-legalization because
14088   // the type-legalizer often scalarizes integer-promoted vectors.
14089   // Performing this optimization before may create bit-casts which
14090   // will be type-legalized to complex code sequences.
14091   // We perform this optimization only before the operation legalizer because we
14092   // may introduce illegal operations.
14093   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14094     return SDValue();
14095 
14096   unsigned NumInScalars = N->getNumOperands();
14097   SDLoc DL(N);
14098   EVT VT = N->getValueType(0);
14099 
14100   // Check to see if this is a BUILD_VECTOR of a bunch of values
14101   // which come from any_extend or zero_extend nodes. If so, we can create
14102   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14103   // optimizations. We do not handle sign-extend because we can't fill the sign
14104   // using shuffles.
14105   EVT SourceType = MVT::Other;
14106   bool AllAnyExt = true;
14107 
14108   for (unsigned i = 0; i != NumInScalars; ++i) {
14109     SDValue In = N->getOperand(i);
14110     // Ignore undef inputs.
14111     if (In.isUndef()) continue;
14112 
14113     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
14114     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14115 
14116     // Abort if the element is not an extension.
14117     if (!ZeroExt && !AnyExt) {
14118       SourceType = MVT::Other;
14119       break;
14120     }
14121 
14122     // The input is a ZeroExt or AnyExt. Check the original type.
14123     EVT InTy = In.getOperand(0).getValueType();
14124 
14125     // Check that all of the widened source types are the same.
14126     if (SourceType == MVT::Other)
14127       // First time.
14128       SourceType = InTy;
14129     else if (InTy != SourceType) {
14130       // Multiple income types. Abort.
14131       SourceType = MVT::Other;
14132       break;
14133     }
14134 
14135     // Check if all of the extends are ANY_EXTENDs.
14136     AllAnyExt &= AnyExt;
14137   }
14138 
14139   // In order to have valid types, all of the inputs must be extended from the
14140   // same source type and all of the inputs must be any or zero extend.
14141   // Scalar sizes must be a power of two.
14142   EVT OutScalarTy = VT.getScalarType();
14143   bool ValidTypes = SourceType != MVT::Other &&
14144                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14145                  isPowerOf2_32(SourceType.getSizeInBits());
14146 
14147   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14148   // turn into a single shuffle instruction.
14149   if (!ValidTypes)
14150     return SDValue();
14151 
14152   bool isLE = DAG.getDataLayout().isLittleEndian();
14153   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14154   assert(ElemRatio > 1 && "Invalid element size ratio");
14155   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14156                                DAG.getConstant(0, DL, SourceType);
14157 
14158   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14159   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14160 
14161   // Populate the new build_vector
14162   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14163     SDValue Cast = N->getOperand(i);
14164     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
14165             Cast.getOpcode() == ISD::ZERO_EXTEND ||
14166             Cast.isUndef()) && "Invalid cast opcode");
14167     SDValue In;
14168     if (Cast.isUndef())
14169       In = DAG.getUNDEF(SourceType);
14170     else
14171       In = Cast->getOperand(0);
14172     unsigned Index = isLE ? (i * ElemRatio) :
14173                             (i * ElemRatio + (ElemRatio - 1));
14174 
14175     assert(Index < Ops.size() && "Invalid index");
14176     Ops[Index] = In;
14177   }
14178 
14179   // The type of the new BUILD_VECTOR node.
14180   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14181   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
14182          "Invalid vector size");
14183   // Check if the new vector type is legal.
14184   if (!isTypeLegal(VecVT)) return SDValue();
14185 
14186   // Make the new BUILD_VECTOR.
14187   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14188 
14189   // The new BUILD_VECTOR node has the potential to be further optimized.
14190   AddToWorklist(BV.getNode());
14191   // Bitcast to the desired type.
14192   return DAG.getBitcast(VT, BV);
14193 }
14194 
14195 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14196   EVT VT = N->getValueType(0);
14197 
14198   unsigned NumInScalars = N->getNumOperands();
14199   SDLoc DL(N);
14200 
14201   EVT SrcVT = MVT::Other;
14202   unsigned Opcode = ISD::DELETED_NODE;
14203   unsigned NumDefs = 0;
14204 
14205   for (unsigned i = 0; i != NumInScalars; ++i) {
14206     SDValue In = N->getOperand(i);
14207     unsigned Opc = In.getOpcode();
14208 
14209     if (Opc == ISD::UNDEF)
14210       continue;
14211 
14212     // If all scalar values are floats and converted from integers.
14213     if (Opcode == ISD::DELETED_NODE &&
14214         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14215       Opcode = Opc;
14216     }
14217 
14218     if (Opc != Opcode)
14219       return SDValue();
14220 
14221     EVT InVT = In.getOperand(0).getValueType();
14222 
14223     // If all scalar values are typed differently, bail out. It's chosen to
14224     // simplify BUILD_VECTOR of integer types.
14225     if (SrcVT == MVT::Other)
14226       SrcVT = InVT;
14227     if (SrcVT != InVT)
14228       return SDValue();
14229     NumDefs++;
14230   }
14231 
14232   // If the vector has just one element defined, it's not worth to fold it into
14233   // a vectorized one.
14234   if (NumDefs < 2)
14235     return SDValue();
14236 
14237   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14238          && "Should only handle conversion from integer to float.");
14239   assert(SrcVT != MVT::Other && "Cannot determine source type!");
14240 
14241   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14242 
14243   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14244     return SDValue();
14245 
14246   // Just because the floating-point vector type is legal does not necessarily
14247   // mean that the corresponding integer vector type is.
14248   if (!isTypeLegal(NVT))
14249     return SDValue();
14250 
14251   SmallVector<SDValue, 8> Opnds;
14252   for (unsigned i = 0; i != NumInScalars; ++i) {
14253     SDValue In = N->getOperand(i);
14254 
14255     if (In.isUndef())
14256       Opnds.push_back(DAG.getUNDEF(SrcVT));
14257     else
14258       Opnds.push_back(In.getOperand(0));
14259   }
14260   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14261   AddToWorklist(BV.getNode());
14262 
14263   return DAG.getNode(Opcode, DL, VT, BV);
14264 }
14265 
14266 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14267                                            ArrayRef<int> VectorMask,
14268                                            SDValue VecIn1, SDValue VecIn2,
14269                                            unsigned LeftIdx) {
14270   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14271   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14272 
14273   EVT VT = N->getValueType(0);
14274   EVT InVT1 = VecIn1.getValueType();
14275   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14276 
14277   unsigned Vec2Offset = 0;
14278   unsigned NumElems = VT.getVectorNumElements();
14279   unsigned ShuffleNumElems = NumElems;
14280 
14281   // In case both the input vectors are extracted from same base
14282   // vector we do not need extra addend (Vec2Offset) while
14283   // computing shuffle mask.
14284   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14285       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14286       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14287     Vec2Offset = InVT1.getVectorNumElements();
14288 
14289   // We can't generate a shuffle node with mismatched input and output types.
14290   // Try to make the types match the type of the output.
14291   if (InVT1 != VT || InVT2 != VT) {
14292     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14293       // If the output vector length is a multiple of both input lengths,
14294       // we can concatenate them and pad the rest with undefs.
14295       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14296       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14297       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14298       ConcatOps[0] = VecIn1;
14299       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14300       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14301       VecIn2 = SDValue();
14302     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14303       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14304         return SDValue();
14305 
14306       if (!VecIn2.getNode()) {
14307         // If we only have one input vector, and it's twice the size of the
14308         // output, split it in two.
14309         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14310                              DAG.getConstant(NumElems, DL, IdxTy));
14311         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14312         // Since we now have shorter input vectors, adjust the offset of the
14313         // second vector's start.
14314         Vec2Offset = NumElems;
14315       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14316         // VecIn1 is wider than the output, and we have another, possibly
14317         // smaller input. Pad the smaller input with undefs, shuffle at the
14318         // input vector width, and extract the output.
14319         // The shuffle type is different than VT, so check legality again.
14320         if (LegalOperations &&
14321             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14322           return SDValue();
14323 
14324         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14325         // lower it back into a BUILD_VECTOR. So if the inserted type is
14326         // illegal, don't even try.
14327         if (InVT1 != InVT2) {
14328           if (!TLI.isTypeLegal(InVT2))
14329             return SDValue();
14330           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14331                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14332         }
14333         ShuffleNumElems = NumElems * 2;
14334       } else {
14335         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14336         // than VecIn1. We can't handle this for now - this case will disappear
14337         // when we start sorting the vectors by type.
14338         return SDValue();
14339       }
14340     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14341                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14342       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14343       ConcatOps[0] = VecIn2;
14344       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14345     } else {
14346       // TODO: Support cases where the length mismatch isn't exactly by a
14347       // factor of 2.
14348       // TODO: Move this check upwards, so that if we have bad type
14349       // mismatches, we don't create any DAG nodes.
14350       return SDValue();
14351     }
14352   }
14353 
14354   // Initialize mask to undef.
14355   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14356 
14357   // Only need to run up to the number of elements actually used, not the
14358   // total number of elements in the shuffle - if we are shuffling a wider
14359   // vector, the high lanes should be set to undef.
14360   for (unsigned i = 0; i != NumElems; ++i) {
14361     if (VectorMask[i] <= 0)
14362       continue;
14363 
14364     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14365     if (VectorMask[i] == (int)LeftIdx) {
14366       Mask[i] = ExtIndex;
14367     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14368       Mask[i] = Vec2Offset + ExtIndex;
14369     }
14370   }
14371 
14372   // The type the input vectors may have changed above.
14373   InVT1 = VecIn1.getValueType();
14374 
14375   // If we already have a VecIn2, it should have the same type as VecIn1.
14376   // If we don't, get an undef/zero vector of the appropriate type.
14377   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14378   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14379 
14380   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14381   if (ShuffleNumElems > NumElems)
14382     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14383 
14384   return Shuffle;
14385 }
14386 
14387 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14388 // operations. If the types of the vectors we're extracting from allow it,
14389 // turn this into a vector_shuffle node.
14390 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14391   SDLoc DL(N);
14392   EVT VT = N->getValueType(0);
14393 
14394   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14395   if (!isTypeLegal(VT))
14396     return SDValue();
14397 
14398   // May only combine to shuffle after legalize if shuffle is legal.
14399   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14400     return SDValue();
14401 
14402   bool UsesZeroVector = false;
14403   unsigned NumElems = N->getNumOperands();
14404 
14405   // Record, for each element of the newly built vector, which input vector
14406   // that element comes from. -1 stands for undef, 0 for the zero vector,
14407   // and positive values for the input vectors.
14408   // VectorMask maps each element to its vector number, and VecIn maps vector
14409   // numbers to their initial SDValues.
14410 
14411   SmallVector<int, 8> VectorMask(NumElems, -1);
14412   SmallVector<SDValue, 8> VecIn;
14413   VecIn.push_back(SDValue());
14414 
14415   for (unsigned i = 0; i != NumElems; ++i) {
14416     SDValue Op = N->getOperand(i);
14417 
14418     if (Op.isUndef())
14419       continue;
14420 
14421     // See if we can use a blend with a zero vector.
14422     // TODO: Should we generalize this to a blend with an arbitrary constant
14423     // vector?
14424     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14425       UsesZeroVector = true;
14426       VectorMask[i] = 0;
14427       continue;
14428     }
14429 
14430     // Not an undef or zero. If the input is something other than an
14431     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14432     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14433         !isa<ConstantSDNode>(Op.getOperand(1)))
14434       return SDValue();
14435     SDValue ExtractedFromVec = Op.getOperand(0);
14436 
14437     // All inputs must have the same element type as the output.
14438     if (VT.getVectorElementType() !=
14439         ExtractedFromVec.getValueType().getVectorElementType())
14440       return SDValue();
14441 
14442     // Have we seen this input vector before?
14443     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14444     // a map back from SDValues to numbers isn't worth it.
14445     unsigned Idx = std::distance(
14446         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14447     if (Idx == VecIn.size())
14448       VecIn.push_back(ExtractedFromVec);
14449 
14450     VectorMask[i] = Idx;
14451   }
14452 
14453   // If we didn't find at least one input vector, bail out.
14454   if (VecIn.size() < 2)
14455     return SDValue();
14456 
14457   // If all the Operands of BUILD_VECTOR extract from same
14458   // vector, then split the vector efficiently based on the maximum
14459   // vector access index and adjust the VectorMask and
14460   // VecIn accordingly.
14461   if (VecIn.size() == 2) {
14462     unsigned MaxIndex = 0;
14463     unsigned NearestPow2 = 0;
14464     SDValue Vec = VecIn.back();
14465     EVT InVT = Vec.getValueType();
14466     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14467     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14468 
14469     for (unsigned i = 0; i < NumElems; i++) {
14470       if (VectorMask[i] <= 0)
14471         continue;
14472       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14473       IndexVec[i] = Index;
14474       MaxIndex = std::max(MaxIndex, Index);
14475     }
14476 
14477     NearestPow2 = PowerOf2Ceil(MaxIndex);
14478     if (InVT.isSimple() && (NearestPow2 > 2) &&
14479         ((NumElems * 2) < NearestPow2)) {
14480       unsigned SplitSize = NearestPow2 / 2;
14481       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14482                                      InVT.getVectorElementType(), SplitSize);
14483       if (TLI.isTypeLegal(SplitVT)) {
14484         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14485                                      DAG.getConstant(SplitSize, DL, IdxTy));
14486         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14487                                      DAG.getConstant(0, DL, IdxTy));
14488         VecIn.pop_back();
14489         VecIn.push_back(VecIn1);
14490         VecIn.push_back(VecIn2);
14491 
14492         for (unsigned i = 0; i < NumElems; i++) {
14493           if (VectorMask[i] <= 0)
14494             continue;
14495           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
14496         }
14497       }
14498     }
14499   }
14500 
14501   // TODO: We want to sort the vectors by descending length, so that adjacent
14502   // pairs have similar length, and the longer vector is always first in the
14503   // pair.
14504 
14505   // TODO: Should this fire if some of the input vectors has illegal type (like
14506   // it does now), or should we let legalization run its course first?
14507 
14508   // Shuffle phase:
14509   // Take pairs of vectors, and shuffle them so that the result has elements
14510   // from these vectors in the correct places.
14511   // For example, given:
14512   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14513   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14514   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14515   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14516   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14517   // We will generate:
14518   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14519   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14520   SmallVector<SDValue, 4> Shuffles;
14521   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14522     unsigned LeftIdx = 2 * In + 1;
14523     SDValue VecLeft = VecIn[LeftIdx];
14524     SDValue VecRight =
14525         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14526 
14527     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14528                                                 VecRight, LeftIdx))
14529       Shuffles.push_back(Shuffle);
14530     else
14531       return SDValue();
14532   }
14533 
14534   // If we need the zero vector as an "ingredient" in the blend tree, add it
14535   // to the list of shuffles.
14536   if (UsesZeroVector)
14537     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14538                                       : DAG.getConstantFP(0.0, DL, VT));
14539 
14540   // If we only have one shuffle, we're done.
14541   if (Shuffles.size() == 1)
14542     return Shuffles[0];
14543 
14544   // Update the vector mask to point to the post-shuffle vectors.
14545   for (int &Vec : VectorMask)
14546     if (Vec == 0)
14547       Vec = Shuffles.size() - 1;
14548     else
14549       Vec = (Vec - 1) / 2;
14550 
14551   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14552   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14553   // generate:
14554   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14555   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14556   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14557   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14558   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14559   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14560   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14561 
14562   // Make sure the initial size of the shuffle list is even.
14563   if (Shuffles.size() % 2)
14564     Shuffles.push_back(DAG.getUNDEF(VT));
14565 
14566   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14567     if (CurSize % 2) {
14568       Shuffles[CurSize] = DAG.getUNDEF(VT);
14569       CurSize++;
14570     }
14571     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14572       int Left = 2 * In;
14573       int Right = 2 * In + 1;
14574       SmallVector<int, 8> Mask(NumElems, -1);
14575       for (unsigned i = 0; i != NumElems; ++i) {
14576         if (VectorMask[i] == Left) {
14577           Mask[i] = i;
14578           VectorMask[i] = In;
14579         } else if (VectorMask[i] == Right) {
14580           Mask[i] = i + NumElems;
14581           VectorMask[i] = In;
14582         }
14583       }
14584 
14585       Shuffles[In] =
14586           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14587     }
14588   }
14589   return Shuffles[0];
14590 }
14591 
14592 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14593 // operations which can be matched to a truncate or to a shuffle-truncate.
14594 SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
14595   // TODO: Add support for big-endian.
14596   if (DAG.getDataLayout().isBigEndian())
14597     return SDValue();
14598   if (N->getNumOperands() < 2)
14599     return SDValue();
14600   SDLoc DL(N);
14601   EVT VT = N->getValueType(0);
14602   unsigned NumElems = N->getNumOperands();
14603 
14604   if (!isTypeLegal(VT))
14605     return SDValue();
14606 
14607   // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
14608   // index, bail out.
14609   // TODO: Allow undef elements in some cases?
14610   if (any_of(N->ops(), [VT](SDValue Op) {
14611         return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14612                !isa<ConstantSDNode>(Op.getOperand(1)) ||
14613                Op.getValueType() != VT.getVectorElementType();
14614       }))
14615     return SDValue();
14616 
14617   // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
14618   auto GetExtractIdx = [](SDValue Extract) {
14619     return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
14620   };
14621 
14622   // The offset is defined to be the BUILD_VECTOR's first operand (assuming no
14623   // undef and little-endian).
14624   int Offset = GetExtractIdx(N->getOperand(0));
14625 
14626   // Compute the stride from the next operand.
14627   int Stride = GetExtractIdx(N->getOperand(1)) - Offset;
14628   SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
14629 
14630   // Proceed only if the stride and the types can be matched to a truncate.
14631   if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
14632       (ExtractedFromVec.getValueType().getVectorNumElements() !=
14633        Stride * NumElems) ||
14634       (VT.getScalarSizeInBits() * Stride > 64))
14635     return SDValue();
14636 
14637   // Check remaining operands are consistent with the computed stride.
14638   for (unsigned i = 1; i != NumElems; ++i) {
14639     SDValue Op = N->getOperand(i);
14640 
14641     if ((Op.getOperand(0) != ExtractedFromVec) ||
14642         (GetExtractIdx(Op) != Stride * i + Offset))
14643       return SDValue();
14644   }
14645 
14646   SDValue Res = ExtractedFromVec;
14647   EVT TruncVT =
14648       VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
14649   if (Offset) {
14650     // If the first index is non-zero, need to shuffle elements of interest to
14651     // lower parts of the vector's elements the truncate will act upon.
14652     // TODO: Generalize to compute the permute-shuffle that will prepare any
14653     // element permutation for the truncate, and let the target decide if
14654     // profitable.
14655     EVT ExtractedVT = ExtractedFromVec.getValueType();
14656     SmallVector<int, 64> Mask;
14657     for (unsigned i = 0; i != NumElems; ++i) {
14658       Mask.push_back(Offset + i * Stride);
14659       // Pad the elements that will be lost after the truncate with undefs.
14660       Mask.append(Stride - 1, -1);
14661     }
14662     if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) ||
14663         !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT,
14664                                                               TruncVT))
14665       return SDValue();
14666     Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res,
14667                                DAG.getUNDEF(ExtractedVT), Mask);
14668   }
14669   // Construct the truncate.
14670   LLVMContext &Ctx = *DAG.getContext();
14671   EVT NewVT = VT.getVectorVT(
14672       Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
14673 
14674   Res = DAG.getBitcast(NewVT, Res);
14675   Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
14676   return DAG.getBitcast(VT, Res);
14677 }
14678 
14679 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14680   EVT VT = N->getValueType(0);
14681 
14682   // A vector built entirely of undefs is undef.
14683   if (ISD::allOperandsUndef(N))
14684     return DAG.getUNDEF(VT);
14685 
14686   // Check if we can express BUILD VECTOR via subvector extract.
14687   if (!LegalTypes && (N->getNumOperands() > 1)) {
14688     SDValue Op0 = N->getOperand(0);
14689     auto checkElem = [&](SDValue Op) -> uint64_t {
14690       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14691           (Op0.getOperand(0) == Op.getOperand(0)))
14692         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14693           return CNode->getZExtValue();
14694       return -1;
14695     };
14696 
14697     int Offset = checkElem(Op0);
14698     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14699       if (Offset + i != checkElem(N->getOperand(i))) {
14700         Offset = -1;
14701         break;
14702       }
14703     }
14704 
14705     if ((Offset == 0) &&
14706         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14707       return Op0.getOperand(0);
14708     if ((Offset != -1) &&
14709         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14710          0)) // IDX must be multiple of output size.
14711       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14712                          Op0.getOperand(0), Op0.getOperand(1));
14713   }
14714 
14715   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14716     return V;
14717 
14718   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14719     return V;
14720 
14721   if (TLI.isDesirableToCombineBuildVectorToTruncate())
14722     if (SDValue V = reduceBuildVecToTrunc(N))
14723       return V;
14724 
14725   if (SDValue V = reduceBuildVecToShuffle(N))
14726     return V;
14727 
14728   return SDValue();
14729 }
14730 
14731 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14732   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14733   EVT OpVT = N->getOperand(0).getValueType();
14734 
14735   // If the operands are legal vectors, leave them alone.
14736   if (TLI.isTypeLegal(OpVT))
14737     return SDValue();
14738 
14739   SDLoc DL(N);
14740   EVT VT = N->getValueType(0);
14741   SmallVector<SDValue, 8> Ops;
14742 
14743   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14744   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14745 
14746   // Keep track of what we encounter.
14747   bool AnyInteger = false;
14748   bool AnyFP = false;
14749   for (const SDValue &Op : N->ops()) {
14750     if (ISD::BITCAST == Op.getOpcode() &&
14751         !Op.getOperand(0).getValueType().isVector())
14752       Ops.push_back(Op.getOperand(0));
14753     else if (ISD::UNDEF == Op.getOpcode())
14754       Ops.push_back(ScalarUndef);
14755     else
14756       return SDValue();
14757 
14758     // Note whether we encounter an integer or floating point scalar.
14759     // If it's neither, bail out, it could be something weird like x86mmx.
14760     EVT LastOpVT = Ops.back().getValueType();
14761     if (LastOpVT.isFloatingPoint())
14762       AnyFP = true;
14763     else if (LastOpVT.isInteger())
14764       AnyInteger = true;
14765     else
14766       return SDValue();
14767   }
14768 
14769   // If any of the operands is a floating point scalar bitcast to a vector,
14770   // use floating point types throughout, and bitcast everything.
14771   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14772   if (AnyFP) {
14773     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14774     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14775     if (AnyInteger) {
14776       for (SDValue &Op : Ops) {
14777         if (Op.getValueType() == SVT)
14778           continue;
14779         if (Op.isUndef())
14780           Op = ScalarUndef;
14781         else
14782           Op = DAG.getBitcast(SVT, Op);
14783       }
14784     }
14785   }
14786 
14787   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14788                                VT.getSizeInBits() / SVT.getSizeInBits());
14789   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14790 }
14791 
14792 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14793 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14794 // most two distinct vectors the same size as the result, attempt to turn this
14795 // into a legal shuffle.
14796 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14797   EVT VT = N->getValueType(0);
14798   EVT OpVT = N->getOperand(0).getValueType();
14799   int NumElts = VT.getVectorNumElements();
14800   int NumOpElts = OpVT.getVectorNumElements();
14801 
14802   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14803   SmallVector<int, 8> Mask;
14804 
14805   for (SDValue Op : N->ops()) {
14806     // Peek through any bitcast.
14807     Op = peekThroughBitcast(Op);
14808 
14809     // UNDEF nodes convert to UNDEF shuffle mask values.
14810     if (Op.isUndef()) {
14811       Mask.append((unsigned)NumOpElts, -1);
14812       continue;
14813     }
14814 
14815     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14816       return SDValue();
14817 
14818     // What vector are we extracting the subvector from and at what index?
14819     SDValue ExtVec = Op.getOperand(0);
14820 
14821     // We want the EVT of the original extraction to correctly scale the
14822     // extraction index.
14823     EVT ExtVT = ExtVec.getValueType();
14824 
14825     // Peek through any bitcast.
14826     ExtVec = peekThroughBitcast(ExtVec);
14827 
14828     // UNDEF nodes convert to UNDEF shuffle mask values.
14829     if (ExtVec.isUndef()) {
14830       Mask.append((unsigned)NumOpElts, -1);
14831       continue;
14832     }
14833 
14834     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14835       return SDValue();
14836     int ExtIdx = Op.getConstantOperandVal(1);
14837 
14838     // Ensure that we are extracting a subvector from a vector the same
14839     // size as the result.
14840     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14841       return SDValue();
14842 
14843     // Scale the subvector index to account for any bitcast.
14844     int NumExtElts = ExtVT.getVectorNumElements();
14845     if (0 == (NumExtElts % NumElts))
14846       ExtIdx /= (NumExtElts / NumElts);
14847     else if (0 == (NumElts % NumExtElts))
14848       ExtIdx *= (NumElts / NumExtElts);
14849     else
14850       return SDValue();
14851 
14852     // At most we can reference 2 inputs in the final shuffle.
14853     if (SV0.isUndef() || SV0 == ExtVec) {
14854       SV0 = ExtVec;
14855       for (int i = 0; i != NumOpElts; ++i)
14856         Mask.push_back(i + ExtIdx);
14857     } else if (SV1.isUndef() || SV1 == ExtVec) {
14858       SV1 = ExtVec;
14859       for (int i = 0; i != NumOpElts; ++i)
14860         Mask.push_back(i + ExtIdx + NumElts);
14861     } else {
14862       return SDValue();
14863     }
14864   }
14865 
14866   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14867     return SDValue();
14868 
14869   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14870                               DAG.getBitcast(VT, SV1), Mask);
14871 }
14872 
14873 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14874   // If we only have one input vector, we don't need to do any concatenation.
14875   if (N->getNumOperands() == 1)
14876     return N->getOperand(0);
14877 
14878   // Check if all of the operands are undefs.
14879   EVT VT = N->getValueType(0);
14880   if (ISD::allOperandsUndef(N))
14881     return DAG.getUNDEF(VT);
14882 
14883   // Optimize concat_vectors where all but the first of the vectors are undef.
14884   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14885         return Op.isUndef();
14886       })) {
14887     SDValue In = N->getOperand(0);
14888     assert(In.getValueType().isVector() && "Must concat vectors");
14889 
14890     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14891     if (In->getOpcode() == ISD::BITCAST &&
14892         !In->getOperand(0)->getValueType(0).isVector()) {
14893       SDValue Scalar = In->getOperand(0);
14894 
14895       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14896       // look through the trunc so we can still do the transform:
14897       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14898       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14899           !TLI.isTypeLegal(Scalar.getValueType()) &&
14900           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14901         Scalar = Scalar->getOperand(0);
14902 
14903       EVT SclTy = Scalar->getValueType(0);
14904 
14905       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14906         return SDValue();
14907 
14908       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14909       if (VNTNumElms < 2)
14910         return SDValue();
14911 
14912       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14913       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14914         return SDValue();
14915 
14916       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14917       return DAG.getBitcast(VT, Res);
14918     }
14919   }
14920 
14921   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14922   // We have already tested above for an UNDEF only concatenation.
14923   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14924   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14925   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14926     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14927   };
14928   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14929     SmallVector<SDValue, 8> Opnds;
14930     EVT SVT = VT.getScalarType();
14931 
14932     EVT MinVT = SVT;
14933     if (!SVT.isFloatingPoint()) {
14934       // If BUILD_VECTOR are from built from integer, they may have different
14935       // operand types. Get the smallest type and truncate all operands to it.
14936       bool FoundMinVT = false;
14937       for (const SDValue &Op : N->ops())
14938         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14939           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14940           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14941           FoundMinVT = true;
14942         }
14943       assert(FoundMinVT && "Concat vector type mismatch");
14944     }
14945 
14946     for (const SDValue &Op : N->ops()) {
14947       EVT OpVT = Op.getValueType();
14948       unsigned NumElts = OpVT.getVectorNumElements();
14949 
14950       if (ISD::UNDEF == Op.getOpcode())
14951         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14952 
14953       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14954         if (SVT.isFloatingPoint()) {
14955           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14956           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14957         } else {
14958           for (unsigned i = 0; i != NumElts; ++i)
14959             Opnds.push_back(
14960                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14961         }
14962       }
14963     }
14964 
14965     assert(VT.getVectorNumElements() == Opnds.size() &&
14966            "Concat vector type mismatch");
14967     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14968   }
14969 
14970   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14971   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14972     return V;
14973 
14974   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14975   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14976     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14977       return V;
14978 
14979   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14980   // nodes often generate nop CONCAT_VECTOR nodes.
14981   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14982   // place the incoming vectors at the exact same location.
14983   SDValue SingleSource = SDValue();
14984   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14985 
14986   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14987     SDValue Op = N->getOperand(i);
14988 
14989     if (Op.isUndef())
14990       continue;
14991 
14992     // Check if this is the identity extract:
14993     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14994       return SDValue();
14995 
14996     // Find the single incoming vector for the extract_subvector.
14997     if (SingleSource.getNode()) {
14998       if (Op.getOperand(0) != SingleSource)
14999         return SDValue();
15000     } else {
15001       SingleSource = Op.getOperand(0);
15002 
15003       // Check the source type is the same as the type of the result.
15004       // If not, this concat may extend the vector, so we can not
15005       // optimize it away.
15006       if (SingleSource.getValueType() != N->getValueType(0))
15007         return SDValue();
15008     }
15009 
15010     unsigned IdentityIndex = i * PartNumElem;
15011     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15012     // The extract index must be constant.
15013     if (!CS)
15014       return SDValue();
15015 
15016     // Check that we are reading from the identity index.
15017     if (CS->getZExtValue() != IdentityIndex)
15018       return SDValue();
15019   }
15020 
15021   if (SingleSource.getNode())
15022     return SingleSource;
15023 
15024   return SDValue();
15025 }
15026 
15027 /// If we are extracting a subvector produced by a wide binary operator with at
15028 /// at least one operand that was the result of a vector concatenation, then try
15029 /// to use the narrow vector operands directly to avoid the concatenation and
15030 /// extraction.
15031 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15032   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15033   // some of these bailouts with other transforms.
15034 
15035   // The extract index must be a constant, so we can map it to a concat operand.
15036   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15037   if (!ExtractIndex)
15038     return SDValue();
15039 
15040   // Only handle the case where we are doubling and then halving. A larger ratio
15041   // may require more than two narrow binops to replace the wide binop.
15042   EVT VT = Extract->getValueType(0);
15043   unsigned NumElems = VT.getVectorNumElements();
15044   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15045          "Extract index is not a multiple of the vector length.");
15046   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15047     return SDValue();
15048 
15049   // We are looking for an optionally bitcasted wide vector binary operator
15050   // feeding an extract subvector.
15051   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15052 
15053   // TODO: The motivating case for this transform is an x86 AVX1 target. That
15054   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15055   // flavors, but no other 256-bit integer support. This could be extended to
15056   // handle any binop, but that may require fixing/adding other folds to avoid
15057   // codegen regressions.
15058   unsigned BOpcode = BinOp.getOpcode();
15059   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15060     return SDValue();
15061 
15062   // The binop must be a vector type, so we can chop it in half.
15063   EVT WideBVT = BinOp.getValueType();
15064   if (!WideBVT.isVector())
15065     return SDValue();
15066 
15067   // Bail out if the target does not support a narrower version of the binop.
15068   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15069                                    WideBVT.getVectorNumElements() / 2);
15070   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15071   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15072     return SDValue();
15073 
15074   // Peek through bitcasts of the binary operator operands if needed.
15075   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15076   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15077 
15078   // We need at least one concatenation operation of a binop operand to make
15079   // this transform worthwhile. The concat must double the input vector sizes.
15080   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15081   bool ConcatL =
15082       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15083   bool ConcatR =
15084       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15085   if (!ConcatL && !ConcatR)
15086     return SDValue();
15087 
15088   // If one of the binop operands was not the result of a concat, we must
15089   // extract a half-sized operand for our new narrow binop. We can't just reuse
15090   // the original extract index operand because we may have bitcasted.
15091   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15092   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15093   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15094   SDLoc DL(Extract);
15095 
15096   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15097   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15098   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15099   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15100                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15101                                     BinOp.getOperand(0),
15102                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15103 
15104   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15105                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15106                                     BinOp.getOperand(1),
15107                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15108 
15109   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15110   return DAG.getBitcast(VT, NarrowBinOp);
15111 }
15112 
15113 /// If we are extracting a subvector from a wide vector load, convert to a
15114 /// narrow load to eliminate the extraction:
15115 /// (extract_subvector (load wide vector)) --> (load narrow vector)
15116 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15117   // TODO: Add support for big-endian. The offset calculation must be adjusted.
15118   if (DAG.getDataLayout().isBigEndian())
15119     return SDValue();
15120 
15121   // TODO: The one-use check is overly conservative. Check the cost of the
15122   // extract instead or remove that condition entirely.
15123   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15124   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15125   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
15126       !ExtIdx)
15127     return SDValue();
15128 
15129   // The narrow load will be offset from the base address of the old load if
15130   // we are extracting from something besides index 0 (little-endian).
15131   EVT VT = Extract->getValueType(0);
15132   SDLoc DL(Extract);
15133   SDValue BaseAddr = Ld->getOperand(1);
15134   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15135 
15136   // TODO: Use "BaseIndexOffset" to make this more effective.
15137   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15138   MachineFunction &MF = DAG.getMachineFunction();
15139   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15140                                                    VT.getStoreSize());
15141   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15142   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15143   return NewLd;
15144 }
15145 
15146 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15147   EVT NVT = N->getValueType(0);
15148   SDValue V = N->getOperand(0);
15149 
15150   // Extract from UNDEF is UNDEF.
15151   if (V.isUndef())
15152     return DAG.getUNDEF(NVT);
15153 
15154   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
15155     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
15156       return NarrowLoad;
15157 
15158   // Combine:
15159   //    (extract_subvec (concat V1, V2, ...), i)
15160   // Into:
15161   //    Vi if possible
15162   // Only operand 0 is checked as 'concat' assumes all inputs of the same
15163   // type.
15164   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
15165       isa<ConstantSDNode>(N->getOperand(1)) &&
15166       V->getOperand(0).getValueType() == NVT) {
15167     unsigned Idx = N->getConstantOperandVal(1);
15168     unsigned NumElems = NVT.getVectorNumElements();
15169     assert((Idx % NumElems) == 0 &&
15170            "IDX in concat is not a multiple of the result vector length.");
15171     return V->getOperand(Idx / NumElems);
15172   }
15173 
15174   // Skip bitcasting
15175   V = peekThroughBitcast(V);
15176 
15177   // If the input is a build vector. Try to make a smaller build vector.
15178   if (V->getOpcode() == ISD::BUILD_VECTOR) {
15179     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
15180       EVT InVT = V->getValueType(0);
15181       unsigned ExtractSize = NVT.getSizeInBits();
15182       unsigned EltSize = InVT.getScalarSizeInBits();
15183       // Only do this if we won't split any elements.
15184       if (ExtractSize % EltSize == 0) {
15185         unsigned NumElems = ExtractSize / EltSize;
15186         EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
15187                                          InVT.getVectorElementType(), NumElems);
15188         if ((!LegalOperations ||
15189              TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
15190             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
15191           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
15192                             EltSize;
15193 
15194           // Extract the pieces from the original build_vector.
15195           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
15196                                             makeArrayRef(V->op_begin() + IdxVal,
15197                                                          NumElems));
15198           return DAG.getBitcast(NVT, BuildVec);
15199         }
15200       }
15201     }
15202   }
15203 
15204   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
15205     // Handle only simple case where vector being inserted and vector
15206     // being extracted are of same size.
15207     EVT SmallVT = V->getOperand(1).getValueType();
15208     if (!NVT.bitsEq(SmallVT))
15209       return SDValue();
15210 
15211     // Only handle cases where both indexes are constants.
15212     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15213     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15214 
15215     if (InsIdx && ExtIdx) {
15216       // Combine:
15217       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15218       // Into:
15219       //    indices are equal or bit offsets are equal => V1
15220       //    otherwise => (extract_subvec V1, ExtIdx)
15221       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15222           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15223         return DAG.getBitcast(NVT, V->getOperand(1));
15224       return DAG.getNode(
15225           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15226           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15227           N->getOperand(1));
15228     }
15229   }
15230 
15231   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
15232     return NarrowBOp;
15233 
15234   return SDValue();
15235 }
15236 
15237 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
15238                                                  SDValue V, SelectionDAG &DAG) {
15239   SDLoc DL(V);
15240   EVT VT = V.getValueType();
15241 
15242   switch (V.getOpcode()) {
15243   default:
15244     return V;
15245 
15246   case ISD::CONCAT_VECTORS: {
15247     EVT OpVT = V->getOperand(0).getValueType();
15248     int OpSize = OpVT.getVectorNumElements();
15249     SmallBitVector OpUsedElements(OpSize, false);
15250     bool FoundSimplification = false;
15251     SmallVector<SDValue, 4> NewOps;
15252     NewOps.reserve(V->getNumOperands());
15253     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
15254       SDValue Op = V->getOperand(i);
15255       bool OpUsed = false;
15256       for (int j = 0; j < OpSize; ++j)
15257         if (UsedElements[i * OpSize + j]) {
15258           OpUsedElements[j] = true;
15259           OpUsed = true;
15260         }
15261       NewOps.push_back(
15262           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
15263                  : DAG.getUNDEF(OpVT));
15264       FoundSimplification |= Op == NewOps.back();
15265       OpUsedElements.reset();
15266     }
15267     if (FoundSimplification)
15268       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
15269     return V;
15270   }
15271 
15272   case ISD::INSERT_SUBVECTOR: {
15273     SDValue BaseV = V->getOperand(0);
15274     SDValue SubV = V->getOperand(1);
15275     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
15276     if (!IdxN)
15277       return V;
15278 
15279     int SubSize = SubV.getValueType().getVectorNumElements();
15280     int Idx = IdxN->getZExtValue();
15281     bool SubVectorUsed = false;
15282     SmallBitVector SubUsedElements(SubSize, false);
15283     for (int i = 0; i < SubSize; ++i)
15284       if (UsedElements[i + Idx]) {
15285         SubVectorUsed = true;
15286         SubUsedElements[i] = true;
15287         UsedElements[i + Idx] = false;
15288       }
15289 
15290     // Now recurse on both the base and sub vectors.
15291     SDValue SimplifiedSubV =
15292         SubVectorUsed
15293             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
15294             : DAG.getUNDEF(SubV.getValueType());
15295     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
15296     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
15297       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
15298                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
15299     return V;
15300   }
15301   }
15302 }
15303 
15304 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
15305                                        SDValue N1, SelectionDAG &DAG) {
15306   EVT VT = SVN->getValueType(0);
15307   int NumElts = VT.getVectorNumElements();
15308   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
15309   for (int M : SVN->getMask())
15310     if (M >= 0 && M < NumElts)
15311       N0UsedElements[M] = true;
15312     else if (M >= NumElts)
15313       N1UsedElements[M - NumElts] = true;
15314 
15315   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
15316   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
15317   if (S0 == N0 && S1 == N1)
15318     return SDValue();
15319 
15320   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
15321 }
15322 
15323 static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
15324                                    SDValue N1, SelectionDAG &DAG) {
15325   auto isUndefElt = [](SDValue V, int Idx) {
15326     // TODO - handle more cases as required.
15327     if (V.getOpcode() == ISD::BUILD_VECTOR)
15328       return V.getOperand(Idx).isUndef();
15329     return false;
15330   };
15331 
15332   EVT VT = SVN->getValueType(0);
15333   unsigned NumElts = VT.getVectorNumElements();
15334 
15335   bool Changed = false;
15336   SmallVector<int, 8> NewMask;
15337   for (unsigned i = 0; i != NumElts; ++i) {
15338     int Idx = SVN->getMaskElt(i);
15339     if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
15340         ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
15341       Changed = true;
15342       Idx = -1;
15343     }
15344     NewMask.push_back(Idx);
15345   }
15346   if (Changed)
15347     return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
15348 
15349   return SDValue();
15350 }
15351 
15352 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15353 // or turn a shuffle of a single concat into simpler shuffle then concat.
15354 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15355   EVT VT = N->getValueType(0);
15356   unsigned NumElts = VT.getVectorNumElements();
15357 
15358   SDValue N0 = N->getOperand(0);
15359   SDValue N1 = N->getOperand(1);
15360   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15361 
15362   SmallVector<SDValue, 4> Ops;
15363   EVT ConcatVT = N0.getOperand(0).getValueType();
15364   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15365   unsigned NumConcats = NumElts / NumElemsPerConcat;
15366 
15367   // Special case: shuffle(concat(A,B)) can be more efficiently represented
15368   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15369   // half vector elements.
15370   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15371       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15372                   SVN->getMask().end(), [](int i) { return i == -1; })) {
15373     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15374                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15375     N1 = DAG.getUNDEF(ConcatVT);
15376     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15377   }
15378 
15379   // Look at every vector that's inserted. We're looking for exact
15380   // subvector-sized copies from a concatenated vector
15381   for (unsigned I = 0; I != NumConcats; ++I) {
15382     // Make sure we're dealing with a copy.
15383     unsigned Begin = I * NumElemsPerConcat;
15384     bool AllUndef = true, NoUndef = true;
15385     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15386       if (SVN->getMaskElt(J) >= 0)
15387         AllUndef = false;
15388       else
15389         NoUndef = false;
15390     }
15391 
15392     if (NoUndef) {
15393       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15394         return SDValue();
15395 
15396       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15397         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15398           return SDValue();
15399 
15400       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15401       if (FirstElt < N0.getNumOperands())
15402         Ops.push_back(N0.getOperand(FirstElt));
15403       else
15404         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15405 
15406     } else if (AllUndef) {
15407       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15408     } else { // Mixed with general masks and undefs, can't do optimization.
15409       return SDValue();
15410     }
15411   }
15412 
15413   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15414 }
15415 
15416 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15417 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15418 //
15419 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15420 // a simplification in some sense, but it isn't appropriate in general: some
15421 // BUILD_VECTORs are substantially cheaper than others. The general case
15422 // of a BUILD_VECTOR requires inserting each element individually (or
15423 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15424 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15425 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15426 // are undef lowers to a small number of element insertions.
15427 //
15428 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15429 // We don't fold shuffles where one side is a non-zero constant, and we don't
15430 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
15431 // non-constant operands. This seems to work out reasonably well in practice.
15432 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15433                                        SelectionDAG &DAG,
15434                                        const TargetLowering &TLI) {
15435   EVT VT = SVN->getValueType(0);
15436   unsigned NumElts = VT.getVectorNumElements();
15437   SDValue N0 = SVN->getOperand(0);
15438   SDValue N1 = SVN->getOperand(1);
15439 
15440   if (!N0->hasOneUse() || !N1->hasOneUse())
15441     return SDValue();
15442   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15443   // discussed above.
15444   if (!N1.isUndef()) {
15445     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15446     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15447     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15448       return SDValue();
15449     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15450       return SDValue();
15451   }
15452 
15453   SmallVector<SDValue, 8> Ops;
15454   SmallSet<SDValue, 16> DuplicateOps;
15455   for (int M : SVN->getMask()) {
15456     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15457     if (M >= 0) {
15458       int Idx = M < (int)NumElts ? M : M - NumElts;
15459       SDValue &S = (M < (int)NumElts ? N0 : N1);
15460       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15461         Op = S.getOperand(Idx);
15462       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15463         if (Idx == 0)
15464           Op = S.getOperand(0);
15465       } else {
15466         // Operand can't be combined - bail out.
15467         return SDValue();
15468       }
15469     }
15470 
15471     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
15472     // fine, but it's likely to generate low-quality code if the target can't
15473     // reconstruct an appropriate shuffle.
15474     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15475       if (!DuplicateOps.insert(Op).second)
15476         return SDValue();
15477 
15478     Ops.push_back(Op);
15479   }
15480   // BUILD_VECTOR requires all inputs to be of the same type, find the
15481   // maximum type and extend them all.
15482   EVT SVT = VT.getScalarType();
15483   if (SVT.isInteger())
15484     for (SDValue &Op : Ops)
15485       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15486   if (SVT != VT.getScalarType())
15487     for (SDValue &Op : Ops)
15488       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15489                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15490                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15491   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15492 }
15493 
15494 // Match shuffles that can be converted to any_vector_extend_in_reg.
15495 // This is often generated during legalization.
15496 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15497 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15498 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15499                                             SelectionDAG &DAG,
15500                                             const TargetLowering &TLI,
15501                                             bool LegalOperations,
15502                                             bool LegalTypes) {
15503   EVT VT = SVN->getValueType(0);
15504   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15505 
15506   // TODO Add support for big-endian when we have a test case.
15507   if (!VT.isInteger() || IsBigEndian)
15508     return SDValue();
15509 
15510   unsigned NumElts = VT.getVectorNumElements();
15511   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15512   ArrayRef<int> Mask = SVN->getMask();
15513   SDValue N0 = SVN->getOperand(0);
15514 
15515   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15516   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15517     for (unsigned i = 0; i != NumElts; ++i) {
15518       if (Mask[i] < 0)
15519         continue;
15520       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15521         continue;
15522       return false;
15523     }
15524     return true;
15525   };
15526 
15527   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15528   // power-of-2 extensions as they are the most likely.
15529   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15530     if (!isAnyExtend(Scale))
15531       continue;
15532 
15533     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15534     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15535     if (!LegalTypes || TLI.isTypeLegal(OutVT))
15536       if (!LegalOperations ||
15537           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15538         return DAG.getBitcast(VT,
15539                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15540   }
15541 
15542   return SDValue();
15543 }
15544 
15545 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15546 // each source element of a large type into the lowest elements of a smaller
15547 // destination type. This is often generated during legalization.
15548 // If the source node itself was a '*_extend_vector_inreg' node then we should
15549 // then be able to remove it.
15550 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15551                                         SelectionDAG &DAG) {
15552   EVT VT = SVN->getValueType(0);
15553   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15554 
15555   // TODO Add support for big-endian when we have a test case.
15556   if (!VT.isInteger() || IsBigEndian)
15557     return SDValue();
15558 
15559   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15560 
15561   unsigned Opcode = N0.getOpcode();
15562   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15563       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15564       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15565     return SDValue();
15566 
15567   SDValue N00 = N0.getOperand(0);
15568   ArrayRef<int> Mask = SVN->getMask();
15569   unsigned NumElts = VT.getVectorNumElements();
15570   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15571   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15572   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15573 
15574   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15575     return SDValue();
15576   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15577 
15578   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15579   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15580   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15581   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15582     for (unsigned i = 0; i != NumElts; ++i) {
15583       if (Mask[i] < 0)
15584         continue;
15585       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15586         continue;
15587       return false;
15588     }
15589     return true;
15590   };
15591 
15592   // At the moment we just handle the case where we've truncated back to the
15593   // same size as before the extension.
15594   // TODO: handle more extension/truncation cases as cases arise.
15595   if (EltSizeInBits != ExtSrcSizeInBits)
15596     return SDValue();
15597 
15598   // We can remove *extend_vector_inreg only if the truncation happens at
15599   // the same scale as the extension.
15600   if (isTruncate(ExtScale))
15601     return DAG.getBitcast(VT, N00);
15602 
15603   return SDValue();
15604 }
15605 
15606 // Combine shuffles of splat-shuffles of the form:
15607 // shuffle (shuffle V, undef, splat-mask), undef, M
15608 // If splat-mask contains undef elements, we need to be careful about
15609 // introducing undef's in the folded mask which are not the result of composing
15610 // the masks of the shuffles.
15611 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15612                                      ShuffleVectorSDNode *Splat,
15613                                      SelectionDAG &DAG) {
15614   ArrayRef<int> SplatMask = Splat->getMask();
15615   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15616 
15617   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15618   // every undef mask element in the splat-shuffle has a corresponding undef
15619   // element in the user-shuffle's mask or if the composition of mask elements
15620   // would result in undef.
15621   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15622   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15623   //   In this case it is not legal to simplify to the splat-shuffle because we
15624   //   may be exposing the users of the shuffle an undef element at index 1
15625   //   which was not there before the combine.
15626   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15627   //   In this case the composition of masks yields SplatMask, so it's ok to
15628   //   simplify to the splat-shuffle.
15629   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15630   //   In this case the composed mask includes all undef elements of SplatMask
15631   //   and in addition sets element zero to undef. It is safe to simplify to
15632   //   the splat-shuffle.
15633   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15634                                        ArrayRef<int> SplatMask) {
15635     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15636       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15637           SplatMask[UserMask[i]] != -1)
15638         return false;
15639     return true;
15640   };
15641   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15642     return SDValue(Splat, 0);
15643 
15644   // Create a new shuffle with a mask that is composed of the two shuffles'
15645   // masks.
15646   SmallVector<int, 32> NewMask;
15647   for (int Idx : UserMask)
15648     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15649 
15650   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15651                               Splat->getOperand(0), Splat->getOperand(1),
15652                               NewMask);
15653 }
15654 
15655 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15656   EVT VT = N->getValueType(0);
15657   unsigned NumElts = VT.getVectorNumElements();
15658 
15659   SDValue N0 = N->getOperand(0);
15660   SDValue N1 = N->getOperand(1);
15661 
15662   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15663 
15664   // Canonicalize shuffle undef, undef -> undef
15665   if (N0.isUndef() && N1.isUndef())
15666     return DAG.getUNDEF(VT);
15667 
15668   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15669 
15670   // Canonicalize shuffle v, v -> v, undef
15671   if (N0 == N1) {
15672     SmallVector<int, 8> NewMask;
15673     for (unsigned i = 0; i != NumElts; ++i) {
15674       int Idx = SVN->getMaskElt(i);
15675       if (Idx >= (int)NumElts) Idx -= NumElts;
15676       NewMask.push_back(Idx);
15677     }
15678     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15679   }
15680 
15681   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15682   if (N0.isUndef())
15683     return DAG.getCommutedVectorShuffle(*SVN);
15684 
15685   // Remove references to rhs if it is undef
15686   if (N1.isUndef()) {
15687     bool Changed = false;
15688     SmallVector<int, 8> NewMask;
15689     for (unsigned i = 0; i != NumElts; ++i) {
15690       int Idx = SVN->getMaskElt(i);
15691       if (Idx >= (int)NumElts) {
15692         Idx = -1;
15693         Changed = true;
15694       }
15695       NewMask.push_back(Idx);
15696     }
15697     if (Changed)
15698       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15699   }
15700 
15701   // Simplify shuffle mask if a referenced element is UNDEF.
15702   if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
15703     return V;
15704 
15705   // A shuffle of a single vector that is a splat can always be folded.
15706   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15707     if (N1->isUndef() && N0Shuf->isSplat())
15708       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15709 
15710   // If it is a splat, check if the argument vector is another splat or a
15711   // build_vector.
15712   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15713     SDNode *V = N0.getNode();
15714 
15715     // If this is a bit convert that changes the element type of the vector but
15716     // not the number of vector elements, look through it.  Be careful not to
15717     // look though conversions that change things like v4f32 to v2f64.
15718     if (V->getOpcode() == ISD::BITCAST) {
15719       SDValue ConvInput = V->getOperand(0);
15720       if (ConvInput.getValueType().isVector() &&
15721           ConvInput.getValueType().getVectorNumElements() == NumElts)
15722         V = ConvInput.getNode();
15723     }
15724 
15725     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15726       assert(V->getNumOperands() == NumElts &&
15727              "BUILD_VECTOR has wrong number of operands");
15728       SDValue Base;
15729       bool AllSame = true;
15730       for (unsigned i = 0; i != NumElts; ++i) {
15731         if (!V->getOperand(i).isUndef()) {
15732           Base = V->getOperand(i);
15733           break;
15734         }
15735       }
15736       // Splat of <u, u, u, u>, return <u, u, u, u>
15737       if (!Base.getNode())
15738         return N0;
15739       for (unsigned i = 0; i != NumElts; ++i) {
15740         if (V->getOperand(i) != Base) {
15741           AllSame = false;
15742           break;
15743         }
15744       }
15745       // Splat of <x, x, x, x>, return <x, x, x, x>
15746       if (AllSame)
15747         return N0;
15748 
15749       // Canonicalize any other splat as a build_vector.
15750       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15751       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15752       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15753 
15754       // We may have jumped through bitcasts, so the type of the
15755       // BUILD_VECTOR may not match the type of the shuffle.
15756       if (V->getValueType(0) != VT)
15757         NewBV = DAG.getBitcast(VT, NewBV);
15758       return NewBV;
15759     }
15760   }
15761 
15762   // There are various patterns used to build up a vector from smaller vectors,
15763   // subvectors, or elements. Scan chains of these and replace unused insertions
15764   // or components with undef.
15765   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
15766     return S;
15767 
15768   // Match shuffles that can be converted to any_vector_extend_in_reg.
15769   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
15770     return V;
15771 
15772   // Combine "truncate_vector_in_reg" style shuffles.
15773   if (SDValue V = combineTruncationShuffle(SVN, DAG))
15774     return V;
15775 
15776   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
15777       Level < AfterLegalizeVectorOps &&
15778       (N1.isUndef() ||
15779       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15780        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
15781     if (SDValue V = partitionShuffleOfConcats(N, DAG))
15782       return V;
15783   }
15784 
15785   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15786   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15787   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15788     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
15789       return Res;
15790 
15791   // If this shuffle only has a single input that is a bitcasted shuffle,
15792   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15793   // back to their original types.
15794   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15795       N1.isUndef() && Level < AfterLegalizeVectorOps &&
15796       TLI.isTypeLegal(VT)) {
15797 
15798     // Peek through the bitcast only if there is one user.
15799     SDValue BC0 = N0;
15800     while (BC0.getOpcode() == ISD::BITCAST) {
15801       if (!BC0.hasOneUse())
15802         break;
15803       BC0 = BC0.getOperand(0);
15804     }
15805 
15806     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15807       if (Scale == 1)
15808         return SmallVector<int, 8>(Mask.begin(), Mask.end());
15809 
15810       SmallVector<int, 8> NewMask;
15811       for (int M : Mask)
15812         for (int s = 0; s != Scale; ++s)
15813           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
15814       return NewMask;
15815     };
15816 
15817     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
15818       EVT SVT = VT.getScalarType();
15819       EVT InnerVT = BC0->getValueType(0);
15820       EVT InnerSVT = InnerVT.getScalarType();
15821 
15822       // Determine which shuffle works with the smaller scalar type.
15823       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
15824       EVT ScaleSVT = ScaleVT.getScalarType();
15825 
15826       if (TLI.isTypeLegal(ScaleVT) &&
15827           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15828           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
15829 
15830         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15831         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15832 
15833         // Scale the shuffle masks to the smaller scalar type.
15834         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15835         SmallVector<int, 8> InnerMask =
15836             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15837         SmallVector<int, 8> OuterMask =
15838             ScaleShuffleMask(SVN->getMask(), OuterScale);
15839 
15840         // Merge the shuffle masks.
15841         SmallVector<int, 8> NewMask;
15842         for (int M : OuterMask)
15843           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
15844 
15845         // Test for shuffle mask legality over both commutations.
15846         SDValue SV0 = BC0->getOperand(0);
15847         SDValue SV1 = BC0->getOperand(1);
15848         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15849         if (!LegalMask) {
15850           std::swap(SV0, SV1);
15851           ShuffleVectorSDNode::commuteMask(NewMask);
15852           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15853         }
15854 
15855         if (LegalMask) {
15856           SV0 = DAG.getBitcast(ScaleVT, SV0);
15857           SV1 = DAG.getBitcast(ScaleVT, SV1);
15858           return DAG.getBitcast(
15859               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15860         }
15861       }
15862     }
15863   }
15864 
15865   // Canonicalize shuffles according to rules:
15866   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15867   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15868   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15869   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15870       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
15871       TLI.isTypeLegal(VT)) {
15872     // The incoming shuffle must be of the same type as the result of the
15873     // current shuffle.
15874     assert(N1->getOperand(0).getValueType() == VT &&
15875            "Shuffle types don't match");
15876 
15877     SDValue SV0 = N1->getOperand(0);
15878     SDValue SV1 = N1->getOperand(1);
15879     bool HasSameOp0 = N0 == SV0;
15880     bool IsSV1Undef = SV1.isUndef();
15881     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
15882       // Commute the operands of this shuffle so that next rule
15883       // will trigger.
15884       return DAG.getCommutedVectorShuffle(*SVN);
15885   }
15886 
15887   // Try to fold according to rules:
15888   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15889   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15890   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15891   // Don't try to fold shuffles with illegal type.
15892   // Only fold if this shuffle is the only user of the other shuffle.
15893   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
15894       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
15895     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15896 
15897     // Don't try to fold splats; they're likely to simplify somehow, or they
15898     // might be free.
15899     if (OtherSV->isSplat())
15900       return SDValue();
15901 
15902     // The incoming shuffle must be of the same type as the result of the
15903     // current shuffle.
15904     assert(OtherSV->getOperand(0).getValueType() == VT &&
15905            "Shuffle types don't match");
15906 
15907     SDValue SV0, SV1;
15908     SmallVector<int, 4> Mask;
15909     // Compute the combined shuffle mask for a shuffle with SV0 as the first
15910     // operand, and SV1 as the second operand.
15911     for (unsigned i = 0; i != NumElts; ++i) {
15912       int Idx = SVN->getMaskElt(i);
15913       if (Idx < 0) {
15914         // Propagate Undef.
15915         Mask.push_back(Idx);
15916         continue;
15917       }
15918 
15919       SDValue CurrentVec;
15920       if (Idx < (int)NumElts) {
15921         // This shuffle index refers to the inner shuffle N0. Lookup the inner
15922         // shuffle mask to identify which vector is actually referenced.
15923         Idx = OtherSV->getMaskElt(Idx);
15924         if (Idx < 0) {
15925           // Propagate Undef.
15926           Mask.push_back(Idx);
15927           continue;
15928         }
15929 
15930         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
15931                                            : OtherSV->getOperand(1);
15932       } else {
15933         // This shuffle index references an element within N1.
15934         CurrentVec = N1;
15935       }
15936 
15937       // Simple case where 'CurrentVec' is UNDEF.
15938       if (CurrentVec.isUndef()) {
15939         Mask.push_back(-1);
15940         continue;
15941       }
15942 
15943       // Canonicalize the shuffle index. We don't know yet if CurrentVec
15944       // will be the first or second operand of the combined shuffle.
15945       Idx = Idx % NumElts;
15946       if (!SV0.getNode() || SV0 == CurrentVec) {
15947         // Ok. CurrentVec is the left hand side.
15948         // Update the mask accordingly.
15949         SV0 = CurrentVec;
15950         Mask.push_back(Idx);
15951         continue;
15952       }
15953 
15954       // Bail out if we cannot convert the shuffle pair into a single shuffle.
15955       if (SV1.getNode() && SV1 != CurrentVec)
15956         return SDValue();
15957 
15958       // Ok. CurrentVec is the right hand side.
15959       // Update the mask accordingly.
15960       SV1 = CurrentVec;
15961       Mask.push_back(Idx + NumElts);
15962     }
15963 
15964     // Check if all indices in Mask are Undef. In case, propagate Undef.
15965     bool isUndefMask = true;
15966     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
15967       isUndefMask &= Mask[i] < 0;
15968 
15969     if (isUndefMask)
15970       return DAG.getUNDEF(VT);
15971 
15972     if (!SV0.getNode())
15973       SV0 = DAG.getUNDEF(VT);
15974     if (!SV1.getNode())
15975       SV1 = DAG.getUNDEF(VT);
15976 
15977     // Avoid introducing shuffles with illegal mask.
15978     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
15979       ShuffleVectorSDNode::commuteMask(Mask);
15980 
15981       if (!TLI.isShuffleMaskLegal(Mask, VT))
15982         return SDValue();
15983 
15984       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
15985       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
15986       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
15987       std::swap(SV0, SV1);
15988     }
15989 
15990     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15991     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15992     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15993     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
15994   }
15995 
15996   return SDValue();
15997 }
15998 
15999 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16000   SDValue InVal = N->getOperand(0);
16001   EVT VT = N->getValueType(0);
16002 
16003   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16004   // with a VECTOR_SHUFFLE and possible truncate.
16005   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16006     SDValue InVec = InVal->getOperand(0);
16007     SDValue EltNo = InVal->getOperand(1);
16008     auto InVecT = InVec.getValueType();
16009     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
16010       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16011       int Elt = C0->getZExtValue();
16012       NewMask[0] = Elt;
16013       SDValue Val;
16014       // If we have an implict truncate do truncate here as long as it's legal.
16015       // if it's not legal, this should
16016       if (VT.getScalarType() != InVal.getValueType() &&
16017           InVal.getValueType().isScalarInteger() &&
16018           isTypeLegal(VT.getScalarType())) {
16019         Val =
16020             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16021         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16022       }
16023       if (VT.getScalarType() == InVecT.getScalarType() &&
16024           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16025           TLI.isShuffleMaskLegal(NewMask, VT)) {
16026         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16027                                    DAG.getUNDEF(InVecT), NewMask);
16028         // If the initial vector is the correct size this shuffle is a
16029         // valid result.
16030         if (VT == InVecT)
16031           return Val;
16032         // If not we must truncate the vector.
16033         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
16034           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16035           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16036           EVT SubVT =
16037               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16038                                VT.getVectorNumElements());
16039           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16040                             ZeroIdx);
16041           return Val;
16042         }
16043       }
16044     }
16045   }
16046 
16047   return SDValue();
16048 }
16049 
16050 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16051   EVT VT = N->getValueType(0);
16052   SDValue N0 = N->getOperand(0);
16053   SDValue N1 = N->getOperand(1);
16054   SDValue N2 = N->getOperand(2);
16055 
16056   // If inserting an UNDEF, just return the original vector.
16057   if (N1.isUndef())
16058     return N0;
16059 
16060   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16061   // us to pull BITCASTs from input to output.
16062   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16063     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16064       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16065 
16066   // If this is an insert of an extracted vector into an undef vector, we can
16067   // just use the input to the extract.
16068   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16069       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16070     return N1.getOperand(0);
16071 
16072   // If we are inserting a bitcast value into an undef, with the same
16073   // number of elements, just use the bitcast input of the extract.
16074   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16075   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16076   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16077       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16078       N1.getOperand(0).getOperand(1) == N2 &&
16079       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16080           VT.getVectorNumElements()) {
16081     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16082   }
16083 
16084   // If both N1 and N2 are bitcast values on which insert_subvector
16085   // would makes sense, pull the bitcast through.
16086   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16087   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16088   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16089     SDValue CN0 = N0.getOperand(0);
16090     SDValue CN1 = N1.getOperand(0);
16091     if (CN0.getValueType().getVectorElementType() ==
16092             CN1.getValueType().getVectorElementType() &&
16093         CN0.getValueType().getVectorNumElements() ==
16094             VT.getVectorNumElements()) {
16095       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16096                                       CN0.getValueType(), CN0, CN1, N2);
16097       return DAG.getBitcast(VT, NewINSERT);
16098     }
16099   }
16100 
16101   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16102   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16103   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16104   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16105       N0.getOperand(1).getValueType() == N1.getValueType() &&
16106       N0.getOperand(2) == N2)
16107     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16108                        N1, N2);
16109 
16110   if (!isa<ConstantSDNode>(N2))
16111     return SDValue();
16112 
16113   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16114 
16115   // Canonicalize insert_subvector dag nodes.
16116   // Example:
16117   // (insert_subvector (insert_subvector A, Idx0), Idx1)
16118   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16119   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16120       N1.getValueType() == N0.getOperand(1).getValueType() &&
16121       isa<ConstantSDNode>(N0.getOperand(2))) {
16122     unsigned OtherIdx = N0.getConstantOperandVal(2);
16123     if (InsIdx < OtherIdx) {
16124       // Swap nodes.
16125       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16126                                   N0.getOperand(0), N1, N2);
16127       AddToWorklist(NewOp.getNode());
16128       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16129                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16130     }
16131   }
16132 
16133   // If the input vector is a concatenation, and the insert replaces
16134   // one of the pieces, we can optimize into a single concat_vectors.
16135   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
16136       N0.getOperand(0).getValueType() == N1.getValueType()) {
16137     unsigned Factor = N1.getValueType().getVectorNumElements();
16138 
16139     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16140     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16141 
16142     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16143   }
16144 
16145   return SDValue();
16146 }
16147 
16148 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16149   SDValue N0 = N->getOperand(0);
16150 
16151   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16152   if (N0->getOpcode() == ISD::FP16_TO_FP)
16153     return N0->getOperand(0);
16154 
16155   return SDValue();
16156 }
16157 
16158 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16159   SDValue N0 = N->getOperand(0);
16160 
16161   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16162   if (N0->getOpcode() == ISD::AND) {
16163     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16164     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
16165       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16166                          N0.getOperand(0));
16167     }
16168   }
16169 
16170   return SDValue();
16171 }
16172 
16173 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16174 /// with the destination vector and a zero vector.
16175 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16176 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
16177 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16178   EVT VT = N->getValueType(0);
16179   SDValue LHS = N->getOperand(0);
16180   SDValue RHS = peekThroughBitcast(N->getOperand(1));
16181   SDLoc DL(N);
16182 
16183   // Make sure we're not running after operation legalization where it
16184   // may have custom lowered the vector shuffles.
16185   if (LegalOperations)
16186     return SDValue();
16187 
16188   if (N->getOpcode() != ISD::AND)
16189     return SDValue();
16190 
16191   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
16192     return SDValue();
16193 
16194   EVT RVT = RHS.getValueType();
16195   unsigned NumElts = RHS.getNumOperands();
16196 
16197   // Attempt to create a valid clear mask, splitting the mask into
16198   // sub elements and checking to see if each is
16199   // all zeros or all ones - suitable for shuffle masking.
16200   auto BuildClearMask = [&](int Split) {
16201     int NumSubElts = NumElts * Split;
16202     int NumSubBits = RVT.getScalarSizeInBits() / Split;
16203 
16204     SmallVector<int, 8> Indices;
16205     for (int i = 0; i != NumSubElts; ++i) {
16206       int EltIdx = i / Split;
16207       int SubIdx = i % Split;
16208       SDValue Elt = RHS.getOperand(EltIdx);
16209       if (Elt.isUndef()) {
16210         Indices.push_back(-1);
16211         continue;
16212       }
16213 
16214       APInt Bits;
16215       if (isa<ConstantSDNode>(Elt))
16216         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16217       else if (isa<ConstantFPSDNode>(Elt))
16218         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16219       else
16220         return SDValue();
16221 
16222       // Extract the sub element from the constant bit mask.
16223       if (DAG.getDataLayout().isBigEndian()) {
16224         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16225       } else {
16226         Bits.lshrInPlace(SubIdx * NumSubBits);
16227       }
16228 
16229       if (Split > 1)
16230         Bits = Bits.trunc(NumSubBits);
16231 
16232       if (Bits.isAllOnesValue())
16233         Indices.push_back(i);
16234       else if (Bits == 0)
16235         Indices.push_back(i + NumSubElts);
16236       else
16237         return SDValue();
16238     }
16239 
16240     // Let's see if the target supports this vector_shuffle.
16241     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16242     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16243     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16244       return SDValue();
16245 
16246     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16247     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16248                                                    DAG.getBitcast(ClearVT, LHS),
16249                                                    Zero, Indices));
16250   };
16251 
16252   // Determine maximum split level (byte level masking).
16253   int MaxSplit = 1;
16254   if (RVT.getScalarSizeInBits() % 8 == 0)
16255     MaxSplit = RVT.getScalarSizeInBits() / 8;
16256 
16257   for (int Split = 1; Split <= MaxSplit; ++Split)
16258     if (RVT.getScalarSizeInBits() % Split == 0)
16259       if (SDValue S = BuildClearMask(Split))
16260         return S;
16261 
16262   return SDValue();
16263 }
16264 
16265 /// Visit a binary vector operation, like ADD.
16266 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16267   assert(N->getValueType(0).isVector() &&
16268          "SimplifyVBinOp only works on vectors!");
16269 
16270   SDValue LHS = N->getOperand(0);
16271   SDValue RHS = N->getOperand(1);
16272   SDValue Ops[] = {LHS, RHS};
16273 
16274   // See if we can constant fold the vector operation.
16275   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16276           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16277     return Fold;
16278 
16279   // Try to convert a constant mask AND into a shuffle clear mask.
16280   if (SDValue Shuffle = XformToShuffleWithZero(N))
16281     return Shuffle;
16282 
16283   // Type legalization might introduce new shuffles in the DAG.
16284   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16285   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
16286   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
16287       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
16288       LHS.getOperand(1).isUndef() &&
16289       RHS.getOperand(1).isUndef()) {
16290     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16291     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16292 
16293     if (SVN0->getMask().equals(SVN1->getMask())) {
16294       EVT VT = N->getValueType(0);
16295       SDValue UndefVector = LHS.getOperand(1);
16296       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16297                                      LHS.getOperand(0), RHS.getOperand(0),
16298                                      N->getFlags());
16299       AddUsersToWorklist(N);
16300       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16301                                   SVN0->getMask());
16302     }
16303   }
16304 
16305   return SDValue();
16306 }
16307 
16308 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16309                                     SDValue N2) {
16310   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
16311 
16312   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16313                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
16314 
16315   // If we got a simplified select_cc node back from SimplifySelectCC, then
16316   // break it down into a new SETCC node, and a new SELECT node, and then return
16317   // the SELECT node, since we were called with a SELECT node.
16318   if (SCC.getNode()) {
16319     // Check to see if we got a select_cc back (to turn into setcc/select).
16320     // Otherwise, just return whatever node we got back, like fabs.
16321     if (SCC.getOpcode() == ISD::SELECT_CC) {
16322       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16323                                   N0.getValueType(),
16324                                   SCC.getOperand(0), SCC.getOperand(1),
16325                                   SCC.getOperand(4));
16326       AddToWorklist(SETCC.getNode());
16327       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16328                            SCC.getOperand(2), SCC.getOperand(3));
16329     }
16330 
16331     return SCC;
16332   }
16333   return SDValue();
16334 }
16335 
16336 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16337 /// being selected between, see if we can simplify the select.  Callers of this
16338 /// should assume that TheSelect is deleted if this returns true.  As such, they
16339 /// should return the appropriate thing (e.g. the node) back to the top-level of
16340 /// the DAG combiner loop to avoid it being looked at.
16341 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16342                                     SDValue RHS) {
16343 
16344   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16345   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16346   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16347     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16348       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16349       SDValue Sqrt = RHS;
16350       ISD::CondCode CC;
16351       SDValue CmpLHS;
16352       const ConstantFPSDNode *Zero = nullptr;
16353 
16354       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16355         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16356         CmpLHS = TheSelect->getOperand(0);
16357         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16358       } else {
16359         // SELECT or VSELECT
16360         SDValue Cmp = TheSelect->getOperand(0);
16361         if (Cmp.getOpcode() == ISD::SETCC) {
16362           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16363           CmpLHS = Cmp.getOperand(0);
16364           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16365         }
16366       }
16367       if (Zero && Zero->isZero() &&
16368           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16369           CC == ISD::SETULT || CC == ISD::SETLT)) {
16370         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16371         CombineTo(TheSelect, Sqrt);
16372         return true;
16373       }
16374     }
16375   }
16376   // Cannot simplify select with vector condition
16377   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16378 
16379   // If this is a select from two identical things, try to pull the operation
16380   // through the select.
16381   if (LHS.getOpcode() != RHS.getOpcode() ||
16382       !LHS.hasOneUse() || !RHS.hasOneUse())
16383     return false;
16384 
16385   // If this is a load and the token chain is identical, replace the select
16386   // of two loads with a load through a select of the address to load from.
16387   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16388   // constants have been dropped into the constant pool.
16389   if (LHS.getOpcode() == ISD::LOAD) {
16390     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16391     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16392 
16393     // Token chains must be identical.
16394     if (LHS.getOperand(0) != RHS.getOperand(0) ||
16395         // Do not let this transformation reduce the number of volatile loads.
16396         LLD->isVolatile() || RLD->isVolatile() ||
16397         // FIXME: If either is a pre/post inc/dec load,
16398         // we'd need to split out the address adjustment.
16399         LLD->isIndexed() || RLD->isIndexed() ||
16400         // If this is an EXTLOAD, the VT's must match.
16401         LLD->getMemoryVT() != RLD->getMemoryVT() ||
16402         // If this is an EXTLOAD, the kind of extension must match.
16403         (LLD->getExtensionType() != RLD->getExtensionType() &&
16404          // The only exception is if one of the extensions is anyext.
16405          LLD->getExtensionType() != ISD::EXTLOAD &&
16406          RLD->getExtensionType() != ISD::EXTLOAD) ||
16407         // FIXME: this discards src value information.  This is
16408         // over-conservative. It would be beneficial to be able to remember
16409         // both potential memory locations.  Since we are discarding
16410         // src value info, don't do the transformation if the memory
16411         // locations are not in the default address space.
16412         LLD->getPointerInfo().getAddrSpace() != 0 ||
16413         RLD->getPointerInfo().getAddrSpace() != 0 ||
16414         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16415                                       LLD->getBasePtr().getValueType()))
16416       return false;
16417 
16418     // Check that the select condition doesn't reach either load.  If so,
16419     // folding this will induce a cycle into the DAG.  If not, this is safe to
16420     // xform, so create a select of the addresses.
16421     SDValue Addr;
16422     if (TheSelect->getOpcode() == ISD::SELECT) {
16423       SDNode *CondNode = TheSelect->getOperand(0).getNode();
16424       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16425           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16426         return false;
16427       // The loads must not depend on one another.
16428       if (LLD->isPredecessorOf(RLD) ||
16429           RLD->isPredecessorOf(LLD))
16430         return false;
16431       Addr = DAG.getSelect(SDLoc(TheSelect),
16432                            LLD->getBasePtr().getValueType(),
16433                            TheSelect->getOperand(0), LLD->getBasePtr(),
16434                            RLD->getBasePtr());
16435     } else {  // Otherwise SELECT_CC
16436       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16437       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16438 
16439       if ((LLD->hasAnyUseOfValue(1) &&
16440            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16441           (RLD->hasAnyUseOfValue(1) &&
16442            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16443         return false;
16444 
16445       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16446                          LLD->getBasePtr().getValueType(),
16447                          TheSelect->getOperand(0),
16448                          TheSelect->getOperand(1),
16449                          LLD->getBasePtr(), RLD->getBasePtr(),
16450                          TheSelect->getOperand(4));
16451     }
16452 
16453     SDValue Load;
16454     // It is safe to replace the two loads if they have different alignments,
16455     // but the new load must be the minimum (most restrictive) alignment of the
16456     // inputs.
16457     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16458     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16459     if (!RLD->isInvariant())
16460       MMOFlags &= ~MachineMemOperand::MOInvariant;
16461     if (!RLD->isDereferenceable())
16462       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16463     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16464       // FIXME: Discards pointer and AA info.
16465       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16466                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16467                          MMOFlags);
16468     } else {
16469       // FIXME: Discards pointer and AA info.
16470       Load = DAG.getExtLoad(
16471           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16472                                                   : LLD->getExtensionType(),
16473           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16474           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16475     }
16476 
16477     // Users of the select now use the result of the load.
16478     CombineTo(TheSelect, Load);
16479 
16480     // Users of the old loads now use the new load's chain.  We know the
16481     // old-load value is dead now.
16482     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16483     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16484     return true;
16485   }
16486 
16487   return false;
16488 }
16489 
16490 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16491 /// bitwise 'and'.
16492 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16493                                             SDValue N1, SDValue N2, SDValue N3,
16494                                             ISD::CondCode CC) {
16495   // If this is a select where the false operand is zero and the compare is a
16496   // check of the sign bit, see if we can perform the "gzip trick":
16497   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16498   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16499   EVT XType = N0.getValueType();
16500   EVT AType = N2.getValueType();
16501   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16502     return SDValue();
16503 
16504   // If the comparison is testing for a positive value, we have to invert
16505   // the sign bit mask, so only do that transform if the target has a bitwise
16506   // 'and not' instruction (the invert is free).
16507   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16508     // (X > -1) ? A : 0
16509     // (X >  0) ? X : 0 <-- This is canonical signed max.
16510     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16511       return SDValue();
16512   } else if (CC == ISD::SETLT) {
16513     // (X <  0) ? A : 0
16514     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16515     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16516       return SDValue();
16517   } else {
16518     return SDValue();
16519   }
16520 
16521   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16522   // constant.
16523   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16524   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16525   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16526     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16527     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16528     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16529     AddToWorklist(Shift.getNode());
16530 
16531     if (XType.bitsGT(AType)) {
16532       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16533       AddToWorklist(Shift.getNode());
16534     }
16535 
16536     if (CC == ISD::SETGT)
16537       Shift = DAG.getNOT(DL, Shift, AType);
16538 
16539     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16540   }
16541 
16542   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16543   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16544   AddToWorklist(Shift.getNode());
16545 
16546   if (XType.bitsGT(AType)) {
16547     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16548     AddToWorklist(Shift.getNode());
16549   }
16550 
16551   if (CC == ISD::SETGT)
16552     Shift = DAG.getNOT(DL, Shift, AType);
16553 
16554   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16555 }
16556 
16557 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16558 /// where 'cond' is the comparison specified by CC.
16559 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16560                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16561                                       bool NotExtCompare) {
16562   // (x ? y : y) -> y.
16563   if (N2 == N3) return N2;
16564 
16565   EVT VT = N2.getValueType();
16566   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16567   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16568 
16569   // Determine if the condition we're dealing with is constant
16570   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16571                               N0, N1, CC, DL, false);
16572   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16573 
16574   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16575     // fold select_cc true, x, y -> x
16576     // fold select_cc false, x, y -> y
16577     return !SCCC->isNullValue() ? N2 : N3;
16578   }
16579 
16580   // Check to see if we can simplify the select into an fabs node
16581   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16582     // Allow either -0.0 or 0.0
16583     if (CFP->isZero()) {
16584       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16585       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16586           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16587           N2 == N3.getOperand(0))
16588         return DAG.getNode(ISD::FABS, DL, VT, N0);
16589 
16590       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16591       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16592           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16593           N2.getOperand(0) == N3)
16594         return DAG.getNode(ISD::FABS, DL, VT, N3);
16595     }
16596   }
16597 
16598   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16599   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16600   // in it.  This is a win when the constant is not otherwise available because
16601   // it replaces two constant pool loads with one.  We only do this if the FP
16602   // type is known to be legal, because if it isn't, then we are before legalize
16603   // types an we want the other legalization to happen first (e.g. to avoid
16604   // messing with soft float) and if the ConstantFP is not legal, because if
16605   // it is legal, we may not need to store the FP constant in a constant pool.
16606   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16607     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16608       if (TLI.isTypeLegal(N2.getValueType()) &&
16609           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16610                TargetLowering::Legal &&
16611            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16612            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16613           // If both constants have multiple uses, then we won't need to do an
16614           // extra load, they are likely around in registers for other users.
16615           (TV->hasOneUse() || FV->hasOneUse())) {
16616         Constant *Elts[] = {
16617           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16618           const_cast<ConstantFP*>(TV->getConstantFPValue())
16619         };
16620         Type *FPTy = Elts[0]->getType();
16621         const DataLayout &TD = DAG.getDataLayout();
16622 
16623         // Create a ConstantArray of the two constants.
16624         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16625         SDValue CPIdx =
16626             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16627                                 TD.getPrefTypeAlignment(FPTy));
16628         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16629 
16630         // Get the offsets to the 0 and 1 element of the array so that we can
16631         // select between them.
16632         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16633         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16634         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16635 
16636         SDValue Cond = DAG.getSetCC(DL,
16637                                     getSetCCResultType(N0.getValueType()),
16638                                     N0, N1, CC);
16639         AddToWorklist(Cond.getNode());
16640         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16641                                           Cond, One, Zero);
16642         AddToWorklist(CstOffset.getNode());
16643         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16644                             CstOffset);
16645         AddToWorklist(CPIdx.getNode());
16646         return DAG.getLoad(
16647             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16648             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16649             Alignment);
16650       }
16651     }
16652 
16653   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16654     return V;
16655 
16656   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16657   // where y is has a single bit set.
16658   // A plaintext description would be, we can turn the SELECT_CC into an AND
16659   // when the condition can be materialized as an all-ones register.  Any
16660   // single bit-test can be materialized as an all-ones register with
16661   // shift-left and shift-right-arith.
16662   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16663       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16664     SDValue AndLHS = N0->getOperand(0);
16665     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16666     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16667       // Shift the tested bit over the sign bit.
16668       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16669       SDValue ShlAmt =
16670         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16671                         getShiftAmountTy(AndLHS.getValueType()));
16672       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16673 
16674       // Now arithmetic right shift it all the way over, so the result is either
16675       // all-ones, or zero.
16676       SDValue ShrAmt =
16677         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16678                         getShiftAmountTy(Shl.getValueType()));
16679       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16680 
16681       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16682     }
16683   }
16684 
16685   // fold select C, 16, 0 -> shl C, 4
16686   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16687       TLI.getBooleanContents(N0.getValueType()) ==
16688           TargetLowering::ZeroOrOneBooleanContent) {
16689 
16690     // If the caller doesn't want us to simplify this into a zext of a compare,
16691     // don't do it.
16692     if (NotExtCompare && N2C->isOne())
16693       return SDValue();
16694 
16695     // Get a SetCC of the condition
16696     // NOTE: Don't create a SETCC if it's not legal on this target.
16697     if (!LegalOperations ||
16698         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16699       SDValue Temp, SCC;
16700       // cast from setcc result type to select result type
16701       if (LegalTypes) {
16702         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16703                             N0, N1, CC);
16704         if (N2.getValueType().bitsLT(SCC.getValueType()))
16705           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16706                                         N2.getValueType());
16707         else
16708           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16709                              N2.getValueType(), SCC);
16710       } else {
16711         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16712         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16713                            N2.getValueType(), SCC);
16714       }
16715 
16716       AddToWorklist(SCC.getNode());
16717       AddToWorklist(Temp.getNode());
16718 
16719       if (N2C->isOne())
16720         return Temp;
16721 
16722       // shl setcc result by log2 n2c
16723       return DAG.getNode(
16724           ISD::SHL, DL, N2.getValueType(), Temp,
16725           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16726                           getShiftAmountTy(Temp.getValueType())));
16727     }
16728   }
16729 
16730   // Check to see if this is an integer abs.
16731   // select_cc setg[te] X,  0,  X, -X ->
16732   // select_cc setgt    X, -1,  X, -X ->
16733   // select_cc setl[te] X,  0, -X,  X ->
16734   // select_cc setlt    X,  1, -X,  X ->
16735   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16736   if (N1C) {
16737     ConstantSDNode *SubC = nullptr;
16738     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
16739          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
16740         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
16741       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16742     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
16743               (N1C->isOne() && CC == ISD::SETLT)) &&
16744              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
16745       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16746 
16747     EVT XType = N0.getValueType();
16748     if (SubC && SubC->isNullValue() && XType.isInteger()) {
16749       SDLoc DL(N0);
16750       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16751                                   N0,
16752                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
16753                                          getShiftAmountTy(N0.getValueType())));
16754       SDValue Add = DAG.getNode(ISD::ADD, DL,
16755                                 XType, N0, Shift);
16756       AddToWorklist(Shift.getNode());
16757       AddToWorklist(Add.getNode());
16758       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16759     }
16760   }
16761 
16762   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16763   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16764   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16765   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16766   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16767   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16768   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16769   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16770   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
16771     SDValue ValueOnZero = N2;
16772     SDValue Count = N3;
16773     // If the condition is NE instead of E, swap the operands.
16774     if (CC == ISD::SETNE)
16775       std::swap(ValueOnZero, Count);
16776     // Check if the value on zero is a constant equal to the bits in the type.
16777     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
16778       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
16779         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16780         // legal, combine to just cttz.
16781         if ((Count.getOpcode() == ISD::CTTZ ||
16782              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16783             N0 == Count.getOperand(0) &&
16784             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
16785           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16786         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16787         // legal, combine to just ctlz.
16788         if ((Count.getOpcode() == ISD::CTLZ ||
16789              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16790             N0 == Count.getOperand(0) &&
16791             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
16792           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16793       }
16794     }
16795   }
16796 
16797   return SDValue();
16798 }
16799 
16800 /// This is a stub for TargetLowering::SimplifySetCC.
16801 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16802                                    ISD::CondCode Cond, const SDLoc &DL,
16803                                    bool foldBooleans) {
16804   TargetLowering::DAGCombinerInfo
16805     DagCombineInfo(DAG, Level, false, this);
16806   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16807 }
16808 
16809 /// Given an ISD::SDIV node expressing a divide by constant, return
16810 /// a DAG expression to select that will generate the same value by multiplying
16811 /// by a magic number.
16812 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16813 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16814   // when optimising for minimum size, we don't want to expand a div to a mul
16815   // and a shift.
16816   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16817     return SDValue();
16818 
16819   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16820   if (!C)
16821     return SDValue();
16822 
16823   // Avoid division by zero.
16824   if (C->isNullValue())
16825     return SDValue();
16826 
16827   std::vector<SDNode*> Built;
16828   SDValue S =
16829       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16830 
16831   for (SDNode *N : Built)
16832     AddToWorklist(N);
16833   return S;
16834 }
16835 
16836 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16837 /// DAG expression that will generate the same value by right shifting.
16838 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16839   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16840   if (!C)
16841     return SDValue();
16842 
16843   // Avoid division by zero.
16844   if (C->isNullValue())
16845     return SDValue();
16846 
16847   std::vector<SDNode *> Built;
16848   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16849 
16850   for (SDNode *N : Built)
16851     AddToWorklist(N);
16852   return S;
16853 }
16854 
16855 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16856 /// expression that will generate the same value by multiplying by a magic
16857 /// number.
16858 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16859 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16860   // when optimising for minimum size, we don't want to expand a div to a mul
16861   // and a shift.
16862   if (DAG.getMachineFunction().getFunction()->optForMinSize())
16863     return SDValue();
16864 
16865   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16866   if (!C)
16867     return SDValue();
16868 
16869   // Avoid division by zero.
16870   if (C->isNullValue())
16871     return SDValue();
16872 
16873   std::vector<SDNode*> Built;
16874   SDValue S =
16875       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16876 
16877   for (SDNode *N : Built)
16878     AddToWorklist(N);
16879   return S;
16880 }
16881 
16882 /// Determines the LogBase2 value for a non-null input value using the
16883 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16884 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16885   EVT VT = V.getValueType();
16886   unsigned EltBits = VT.getScalarSizeInBits();
16887   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16888   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16889   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16890   return LogBase2;
16891 }
16892 
16893 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16894 /// For the reciprocal, we need to find the zero of the function:
16895 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
16896 ///     =>
16897 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16898 ///     does not require additional intermediate precision]
16899 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16900   if (Level >= AfterLegalizeDAG)
16901     return SDValue();
16902 
16903   // TODO: Handle half and/or extended types?
16904   EVT VT = Op.getValueType();
16905   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
16906     return SDValue();
16907 
16908   // If estimates are explicitly disabled for this function, we're done.
16909   MachineFunction &MF = DAG.getMachineFunction();
16910   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16911   if (Enabled == TLI.ReciprocalEstimate::Disabled)
16912     return SDValue();
16913 
16914   // Estimates may be explicitly enabled for this type with a custom number of
16915   // refinement steps.
16916   int Iterations = TLI.getDivRefinementSteps(VT, MF);
16917   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16918     AddToWorklist(Est.getNode());
16919 
16920     if (Iterations) {
16921       EVT VT = Op.getValueType();
16922       SDLoc DL(Op);
16923       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16924 
16925       // Newton iterations: Est = Est + Est (1 - Arg * Est)
16926       for (int i = 0; i < Iterations; ++i) {
16927         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16928         AddToWorklist(NewEst.getNode());
16929 
16930         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16931         AddToWorklist(NewEst.getNode());
16932 
16933         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16934         AddToWorklist(NewEst.getNode());
16935 
16936         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16937         AddToWorklist(Est.getNode());
16938       }
16939     }
16940     return Est;
16941   }
16942 
16943   return SDValue();
16944 }
16945 
16946 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16947 /// For the reciprocal sqrt, we need to find the zero of the function:
16948 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16949 ///     =>
16950 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16951 /// As a result, we precompute A/2 prior to the iteration loop.
16952 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16953                                          unsigned Iterations,
16954                                          SDNodeFlags Flags, bool Reciprocal) {
16955   EVT VT = Arg.getValueType();
16956   SDLoc DL(Arg);
16957   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16958 
16959   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16960   // this entire sequence requires only one FP constant.
16961   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16962   AddToWorklist(HalfArg.getNode());
16963 
16964   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16965   AddToWorklist(HalfArg.getNode());
16966 
16967   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16968   for (unsigned i = 0; i < Iterations; ++i) {
16969     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16970     AddToWorklist(NewEst.getNode());
16971 
16972     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16973     AddToWorklist(NewEst.getNode());
16974 
16975     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
16976     AddToWorklist(NewEst.getNode());
16977 
16978     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16979     AddToWorklist(Est.getNode());
16980   }
16981 
16982   // If non-reciprocal square root is requested, multiply the result by Arg.
16983   if (!Reciprocal) {
16984     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
16985     AddToWorklist(Est.getNode());
16986   }
16987 
16988   return Est;
16989 }
16990 
16991 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16992 /// For the reciprocal sqrt, we need to find the zero of the function:
16993 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16994 ///     =>
16995 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
16996 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
16997                                          unsigned Iterations,
16998                                          SDNodeFlags Flags, bool Reciprocal) {
16999   EVT VT = Arg.getValueType();
17000   SDLoc DL(Arg);
17001   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17002   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17003 
17004   // This routine must enter the loop below to work correctly
17005   // when (Reciprocal == false).
17006   assert(Iterations > 0);
17007 
17008   // Newton iterations for reciprocal square root:
17009   // E = (E * -0.5) * ((A * E) * E + -3.0)
17010   for (unsigned i = 0; i < Iterations; ++i) {
17011     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17012     AddToWorklist(AE.getNode());
17013 
17014     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17015     AddToWorklist(AEE.getNode());
17016 
17017     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17018     AddToWorklist(RHS.getNode());
17019 
17020     // When calculating a square root at the last iteration build:
17021     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17022     // (notice a common subexpression)
17023     SDValue LHS;
17024     if (Reciprocal || (i + 1) < Iterations) {
17025       // RSQRT: LHS = (E * -0.5)
17026       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17027     } else {
17028       // SQRT: LHS = (A * E) * -0.5
17029       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17030     }
17031     AddToWorklist(LHS.getNode());
17032 
17033     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17034     AddToWorklist(Est.getNode());
17035   }
17036 
17037   return Est;
17038 }
17039 
17040 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17041 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17042 /// Op can be zero.
17043 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17044                                            bool Reciprocal) {
17045   if (Level >= AfterLegalizeDAG)
17046     return SDValue();
17047 
17048   // TODO: Handle half and/or extended types?
17049   EVT VT = Op.getValueType();
17050   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17051     return SDValue();
17052 
17053   // If estimates are explicitly disabled for this function, we're done.
17054   MachineFunction &MF = DAG.getMachineFunction();
17055   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17056   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17057     return SDValue();
17058 
17059   // Estimates may be explicitly enabled for this type with a custom number of
17060   // refinement steps.
17061   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17062 
17063   bool UseOneConstNR = false;
17064   if (SDValue Est =
17065       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17066                           Reciprocal)) {
17067     AddToWorklist(Est.getNode());
17068 
17069     if (Iterations) {
17070       Est = UseOneConstNR
17071             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17072             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17073 
17074       if (!Reciprocal) {
17075         // Unfortunately, Est is now NaN if the input was exactly 0.0.
17076         // Select out this case and force the answer to 0.0.
17077         EVT VT = Op.getValueType();
17078         SDLoc DL(Op);
17079 
17080         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17081         EVT CCVT = getSetCCResultType(VT);
17082         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17083         AddToWorklist(ZeroCmp.getNode());
17084 
17085         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
17086                           ZeroCmp, FPZero, Est);
17087         AddToWorklist(Est.getNode());
17088       }
17089     }
17090     return Est;
17091   }
17092 
17093   return SDValue();
17094 }
17095 
17096 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17097   return buildSqrtEstimateImpl(Op, Flags, true);
17098 }
17099 
17100 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17101   return buildSqrtEstimateImpl(Op, Flags, false);
17102 }
17103 
17104 /// Return true if base is a frame index, which is known not to alias with
17105 /// anything but itself.  Provides base object and offset as results.
17106 static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
17107                            const GlobalValue *&GV, const void *&CV) {
17108   // Assume it is a primitive operation.
17109   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
17110 
17111   // If it's an adding a simple constant then integrate the offset.
17112   if (Base.getOpcode() == ISD::ADD) {
17113     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
17114       Base = Base.getOperand(0);
17115       Offset += C->getSExtValue();
17116     }
17117   }
17118 
17119   // Return the underlying GlobalValue, and update the Offset.  Return false
17120   // for GlobalAddressSDNode since the same GlobalAddress may be represented
17121   // by multiple nodes with different offsets.
17122   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
17123     GV = G->getGlobal();
17124     Offset += G->getOffset();
17125     return false;
17126   }
17127 
17128   // Return the underlying Constant value, and update the Offset.  Return false
17129   // for ConstantSDNodes since the same constant pool entry may be represented
17130   // by multiple nodes with different offsets.
17131   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
17132     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
17133                                          : (const void *)C->getConstVal();
17134     Offset += C->getOffset();
17135     return false;
17136   }
17137   // If it's any of the following then it can't alias with anything but itself.
17138   return isa<FrameIndexSDNode>(Base);
17139 }
17140 
17141 /// Return true if there is any possibility that the two addresses overlap.
17142 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17143   // If they are the same then they must be aliases.
17144   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17145 
17146   // If they are both volatile then they cannot be reordered.
17147   if (Op0->isVolatile() && Op1->isVolatile()) return true;
17148 
17149   // If one operation reads from invariant memory, and the other may store, they
17150   // cannot alias. These should really be checking the equivalent of mayWrite,
17151   // but it only matters for memory nodes other than load /store.
17152   if (Op0->isInvariant() && Op1->writeMem())
17153     return false;
17154 
17155   if (Op1->isInvariant() && Op0->writeMem())
17156     return false;
17157 
17158   unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
17159   unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
17160 
17161   // Check for BaseIndexOffset matching.
17162   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
17163   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
17164   int64_t PtrDiff;
17165   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17166     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17167 
17168   // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17169   // able to calculate their relative offset if at least one arises
17170   // from an alloca. However, these allocas cannot overlap and we
17171   // can infer there is no alias.
17172   if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17173     if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17174       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17175       // If the base are the same frame index but the we couldn't find a
17176       // constant offset, (indices are different) be conservative.
17177       if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
17178                      !MFI.isFixedObjectIndex(B->getIndex())))
17179         return false;
17180     }
17181 
17182   // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
17183   // modified to use BaseIndexOffset.
17184 
17185   // Gather base node and offset information.
17186   SDValue Base0, Base1;
17187   int64_t Offset0, Offset1;
17188   const GlobalValue *GV0, *GV1;
17189   const void *CV0, *CV1;
17190   bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
17191                                       Base0, Offset0, GV0, CV0);
17192   bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
17193                                       Base1, Offset1, GV1, CV1);
17194 
17195   // If they have the same base address, then check to see if they overlap.
17196   if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
17197     return !((Offset0 + NumBytes0) <= Offset1 ||
17198              (Offset1 + NumBytes1) <= Offset0);
17199 
17200   // It is possible for different frame indices to alias each other, mostly
17201   // when tail call optimization reuses return address slots for arguments.
17202   // To catch this case, look up the actual index of frame indices to compute
17203   // the real alias relationship.
17204   if (IsFrameIndex0 && IsFrameIndex1) {
17205     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17206     Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
17207     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
17208     return !((Offset0 + NumBytes0) <= Offset1 ||
17209              (Offset1 + NumBytes1) <= Offset0);
17210   }
17211 
17212   // Otherwise, if we know what the bases are, and they aren't identical, then
17213   // we know they cannot alias.
17214   if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
17215     return false;
17216 
17217   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
17218   // compared to the size and offset of the access, we may be able to prove they
17219   // do not alias. This check is conservative for now to catch cases created by
17220   // splitting vector types.
17221   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17222   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17223   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17224   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17225   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
17226       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
17227     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17228     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17229 
17230     // There is no overlap between these relatively aligned accesses of similar
17231     // size. Return no alias.
17232     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17233         (OffAlign1 + NumBytes1) <= OffAlign0)
17234       return false;
17235   }
17236 
17237   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17238                    ? CombinerGlobalAA
17239                    : DAG.getSubtarget().useAA();
17240 #ifndef NDEBUG
17241   if (CombinerAAOnlyFunc.getNumOccurrences() &&
17242       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17243     UseAA = false;
17244 #endif
17245 
17246   if (UseAA && AA &&
17247       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
17248     // Use alias analysis information.
17249     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17250     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17251     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17252     AliasResult AAResult =
17253         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17254                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
17255                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17256                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
17257     if (AAResult == NoAlias)
17258       return false;
17259   }
17260 
17261   // Otherwise we have to assume they alias.
17262   return true;
17263 }
17264 
17265 /// Walk up chain skipping non-aliasing memory nodes,
17266 /// looking for aliasing nodes and adding them to the Aliases vector.
17267 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17268                                    SmallVectorImpl<SDValue> &Aliases) {
17269   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
17270   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
17271 
17272   // Get alias information for node.
17273   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17274 
17275   // Starting off.
17276   Chains.push_back(OriginalChain);
17277   unsigned Depth = 0;
17278 
17279   // Look at each chain and determine if it is an alias.  If so, add it to the
17280   // aliases list.  If not, then continue up the chain looking for the next
17281   // candidate.
17282   while (!Chains.empty()) {
17283     SDValue Chain = Chains.pop_back_val();
17284 
17285     // For TokenFactor nodes, look at each operand and only continue up the
17286     // chain until we reach the depth limit.
17287     //
17288     // FIXME: The depth check could be made to return the last non-aliasing
17289     // chain we found before we hit a tokenfactor rather than the original
17290     // chain.
17291     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
17292       Aliases.clear();
17293       Aliases.push_back(OriginalChain);
17294       return;
17295     }
17296 
17297     // Don't bother if we've been before.
17298     if (!Visited.insert(Chain.getNode()).second)
17299       continue;
17300 
17301     switch (Chain.getOpcode()) {
17302     case ISD::EntryToken:
17303       // Entry token is ideal chain operand, but handled in FindBetterChain.
17304       break;
17305 
17306     case ISD::LOAD:
17307     case ISD::STORE: {
17308       // Get alias information for Chain.
17309       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17310           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17311 
17312       // If chain is alias then stop here.
17313       if (!(IsLoad && IsOpLoad) &&
17314           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
17315         Aliases.push_back(Chain);
17316       } else {
17317         // Look further up the chain.
17318         Chains.push_back(Chain.getOperand(0));
17319         ++Depth;
17320       }
17321       break;
17322     }
17323 
17324     case ISD::TokenFactor:
17325       // We have to check each of the operands of the token factor for "small"
17326       // token factors, so we queue them up.  Adding the operands to the queue
17327       // (stack) in reverse order maintains the original order and increases the
17328       // likelihood that getNode will find a matching token factor (CSE.)
17329       if (Chain.getNumOperands() > 16) {
17330         Aliases.push_back(Chain);
17331         break;
17332       }
17333       for (unsigned n = Chain.getNumOperands(); n;)
17334         Chains.push_back(Chain.getOperand(--n));
17335       ++Depth;
17336       break;
17337 
17338     case ISD::CopyFromReg:
17339       // Forward past CopyFromReg.
17340       Chains.push_back(Chain.getOperand(0));
17341       ++Depth;
17342       break;
17343 
17344     default:
17345       // For all other instructions we will just have to take what we can get.
17346       Aliases.push_back(Chain);
17347       break;
17348     }
17349   }
17350 }
17351 
17352 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17353 /// (aliasing node.)
17354 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17355   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
17356 
17357   // Accumulate all the aliases to this node.
17358   GatherAllAliases(N, OldChain, Aliases);
17359 
17360   // If no operands then chain to entry token.
17361   if (Aliases.size() == 0)
17362     return DAG.getEntryNode();
17363 
17364   // If a single operand then chain to it.  We don't need to revisit it.
17365   if (Aliases.size() == 1)
17366     return Aliases[0];
17367 
17368   // Construct a custom tailored token factor.
17369   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17370 }
17371 
17372 // This function tries to collect a bunch of potentially interesting
17373 // nodes to improve the chains of, all at once. This might seem
17374 // redundant, as this function gets called when visiting every store
17375 // node, so why not let the work be done on each store as it's visited?
17376 //
17377 // I believe this is mainly important because MergeConsecutiveStores
17378 // is unable to deal with merging stores of different sizes, so unless
17379 // we improve the chains of all the potential candidates up-front
17380 // before running MergeConsecutiveStores, it might only see some of
17381 // the nodes that will eventually be candidates, and then not be able
17382 // to go from a partially-merged state to the desired final
17383 // fully-merged state.
17384 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17385   // This holds the base pointer, index, and the offset in bytes from the base
17386   // pointer.
17387   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
17388 
17389   // We must have a base and an offset.
17390   if (!BasePtr.getBase().getNode())
17391     return false;
17392 
17393   // Do not handle stores to undef base pointers.
17394   if (BasePtr.getBase().isUndef())
17395     return false;
17396 
17397   SmallVector<StoreSDNode *, 8> ChainedStores;
17398   ChainedStores.push_back(St);
17399 
17400   // Walk up the chain and look for nodes with offsets from the same
17401   // base pointer. Stop when reaching an instruction with a different kind
17402   // or instruction which has a different base pointer.
17403   StoreSDNode *Index = St;
17404   while (Index) {
17405     // If the chain has more than one use, then we can't reorder the mem ops.
17406     if (Index != St && !SDValue(Index, 0)->hasOneUse())
17407       break;
17408 
17409     if (Index->isVolatile() || Index->isIndexed())
17410       break;
17411 
17412     // Find the base pointer and offset for this memory node.
17413     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
17414 
17415     // Check that the base pointer is the same as the original one.
17416     if (!BasePtr.equalBaseIndex(Ptr, DAG))
17417       break;
17418 
17419     // Walk up the chain to find the next store node, ignoring any
17420     // intermediate loads. Any other kind of node will halt the loop.
17421     SDNode *NextInChain = Index->getChain().getNode();
17422     while (true) {
17423       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17424         // We found a store node. Use it for the next iteration.
17425         if (STn->isVolatile() || STn->isIndexed()) {
17426           Index = nullptr;
17427           break;
17428         }
17429         ChainedStores.push_back(STn);
17430         Index = STn;
17431         break;
17432       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17433         NextInChain = Ldn->getChain().getNode();
17434         continue;
17435       } else {
17436         Index = nullptr;
17437         break;
17438       }
17439     } // end while
17440   }
17441 
17442   // At this point, ChainedStores lists all of the Store nodes
17443   // reachable by iterating up through chain nodes matching the above
17444   // conditions.  For each such store identified, try to find an
17445   // earlier chain to attach the store to which won't violate the
17446   // required ordering.
17447   bool MadeChangeToSt = false;
17448   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17449 
17450   for (StoreSDNode *ChainedStore : ChainedStores) {
17451     SDValue Chain = ChainedStore->getChain();
17452     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17453 
17454     if (Chain != BetterChain) {
17455       if (ChainedStore == St)
17456         MadeChangeToSt = true;
17457       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17458     }
17459   }
17460 
17461   // Do all replacements after finding the replacements to make to avoid making
17462   // the chains more complicated by introducing new TokenFactors.
17463   for (auto Replacement : BetterChains)
17464     replaceStoreChain(Replacement.first, Replacement.second);
17465 
17466   return MadeChangeToSt;
17467 }
17468 
17469 /// This is the entry point for the file.
17470 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17471                            CodeGenOpt::Level OptLevel) {
17472   /// This is the main entry point to this class.
17473   DAGCombiner(*this, AA, OptLevel).Run(Level);
17474 }
17475