1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetLowering.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Target/TargetRegisterInfo.h"
40 #include "llvm/Target/TargetSubtargetInfo.h"
41 #include <algorithm>
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "dagcombine"
45 
46 STATISTIC(NodesCombined   , "Number of dag nodes combined");
47 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
48 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
49 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
50 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
51 STATISTIC(SlicedLoads, "Number of load sliced");
52 
53 namespace {
54   static cl::opt<bool>
55     CombinerAA("combiner-alias-analysis", cl::Hidden,
56                cl::desc("Enable DAG combiner alias-analysis heuristics"));
57 
58   static cl::opt<bool>
59     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
60                cl::desc("Enable DAG combiner's use of IR alias analysis"));
61 
62   static cl::opt<bool>
63     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
64                cl::desc("Enable DAG combiner's use of TBAA"));
65 
66 #ifndef NDEBUG
67   static cl::opt<std::string>
68     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
69                cl::desc("Only use DAG-combiner alias analysis in this"
70                         " function"));
71 #endif
72 
73   /// Hidden option to stress test load slicing, i.e., when this option
74   /// is enabled, load slicing bypasses most of its profitability guards.
75   static cl::opt<bool>
76   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
77                     cl::desc("Bypass the profitability model of load "
78                              "slicing"),
79                     cl::init(false));
80 
81   static cl::opt<bool>
82     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
83                       cl::desc("DAG combiner may split indexing from loads"));
84 
85 //------------------------------ DAGCombiner ---------------------------------//
86 
87   class DAGCombiner {
88     SelectionDAG &DAG;
89     const TargetLowering &TLI;
90     CombineLevel Level;
91     CodeGenOpt::Level OptLevel;
92     bool LegalOperations;
93     bool LegalTypes;
94     bool ForCodeSize;
95 
96     /// \brief Worklist of all of the nodes that need to be simplified.
97     ///
98     /// This must behave as a stack -- new nodes to process are pushed onto the
99     /// back and when processing we pop off of the back.
100     ///
101     /// The worklist will not contain duplicates but may contain null entries
102     /// due to nodes being deleted from the underlying DAG.
103     SmallVector<SDNode *, 64> Worklist;
104 
105     /// \brief Mapping from an SDNode to its position on the worklist.
106     ///
107     /// This is used to find and remove nodes from the worklist (by nulling
108     /// them) when they are deleted from the underlying DAG. It relies on
109     /// stable indices of nodes within the worklist.
110     DenseMap<SDNode *, unsigned> WorklistMap;
111 
112     /// \brief Set of nodes which have been combined (at least once).
113     ///
114     /// This is used to allow us to reliably add any operands of a DAG node
115     /// which have not yet been combined to the worklist.
116     SmallPtrSet<SDNode *, 32> CombinedNodes;
117 
118     // AA - Used for DAG load/store alias analysis.
119     AliasAnalysis &AA;
120 
121     /// When an instruction is simplified, add all users of the instruction to
122     /// the work lists because they might get more simplified now.
123     void AddUsersToWorklist(SDNode *N) {
124       for (SDNode *Node : N->uses())
125         AddToWorklist(Node);
126     }
127 
128     /// Call the node-specific routine that folds each particular type of node.
129     SDValue visit(SDNode *N);
130 
131   public:
132     /// Add to the worklist making sure its instance is at the back (next to be
133     /// processed.)
134     void AddToWorklist(SDNode *N) {
135       // Skip handle nodes as they can't usefully be combined and confuse the
136       // zero-use deletion strategy.
137       if (N->getOpcode() == ISD::HANDLENODE)
138         return;
139 
140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
141         Worklist.push_back(N);
142     }
143 
144     /// Remove all instances of N from the worklist.
145     void removeFromWorklist(SDNode *N) {
146       CombinedNodes.erase(N);
147 
148       auto It = WorklistMap.find(N);
149       if (It == WorklistMap.end())
150         return; // Not in the worklist.
151 
152       // Null out the entry rather than erasing it to avoid a linear operation.
153       Worklist[It->second] = nullptr;
154       WorklistMap.erase(It);
155     }
156 
157     void deleteAndRecombine(SDNode *N);
158     bool recursivelyDeleteUnusedNodes(SDNode *N);
159 
160     /// Replaces all uses of the results of one DAG node with new values.
161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
162                       bool AddTo = true);
163 
164     /// Replaces all uses of the results of one DAG node with new values.
165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
166       return CombineTo(N, &Res, 1, AddTo);
167     }
168 
169     /// Replaces all uses of the results of one DAG node with new values.
170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
171                       bool AddTo = true) {
172       SDValue To[] = { Res0, Res1 };
173       return CombineTo(N, To, 2, AddTo);
174     }
175 
176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
177 
178   private:
179 
180     /// Check the specified integer node value to see if it can be simplified or
181     /// if things it uses can be simplified by bit propagation.
182     /// If so, return true.
183     bool SimplifyDemandedBits(SDValue Op) {
184       unsigned BitWidth = Op.getScalarValueSizeInBits();
185       APInt Demanded = APInt::getAllOnesValue(BitWidth);
186       return SimplifyDemandedBits(Op, Demanded);
187     }
188 
189     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
190 
191     bool CombineToPreIndexedLoadStore(SDNode *N);
192     bool CombineToPostIndexedLoadStore(SDNode *N);
193     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
194     bool SliceUpLoad(SDNode *N);
195 
196     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
197     ///   load.
198     ///
199     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
200     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
201     /// \param EltNo index of the vector element to load.
202     /// \param OriginalLoad load that EVE came from to be replaced.
203     /// \returns EVE on success SDValue() on failure.
204     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
205         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
206     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
207     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
208     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
209     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
210     SDValue PromoteIntBinOp(SDValue Op);
211     SDValue PromoteIntShiftOp(SDValue Op);
212     SDValue PromoteExtend(SDValue Op);
213     bool PromoteLoad(SDValue Op);
214 
215     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
216                          SDValue ExtLoad, const SDLoc &DL,
217                          ISD::NodeType ExtType);
218 
219     /// Call the node-specific routine that knows how to fold each
220     /// particular type of node. If that doesn't do anything, try the
221     /// target-specific DAG combines.
222     SDValue combine(SDNode *N);
223 
224     // Visitation implementation - Implement dag node combining for different
225     // node types.  The semantics are as follows:
226     // Return Value:
227     //   SDValue.getNode() == 0 - No change was made
228     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
229     //   otherwise              - N should be replaced by the returned Operand.
230     //
231     SDValue visitTokenFactor(SDNode *N);
232     SDValue visitMERGE_VALUES(SDNode *N);
233     SDValue visitADD(SDNode *N);
234     SDValue visitSUB(SDNode *N);
235     SDValue visitADDC(SDNode *N);
236     SDValue visitSUBC(SDNode *N);
237     SDValue visitADDE(SDNode *N);
238     SDValue visitSUBE(SDNode *N);
239     SDValue visitMUL(SDNode *N);
240     SDValue useDivRem(SDNode *N);
241     SDValue visitSDIV(SDNode *N);
242     SDValue visitUDIV(SDNode *N);
243     SDValue visitREM(SDNode *N);
244     SDValue visitMULHU(SDNode *N);
245     SDValue visitMULHS(SDNode *N);
246     SDValue visitSMUL_LOHI(SDNode *N);
247     SDValue visitUMUL_LOHI(SDNode *N);
248     SDValue visitSMULO(SDNode *N);
249     SDValue visitUMULO(SDNode *N);
250     SDValue visitIMINMAX(SDNode *N);
251     SDValue visitAND(SDNode *N);
252     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
253     SDValue visitOR(SDNode *N);
254     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
255     SDValue visitXOR(SDNode *N);
256     SDValue SimplifyVBinOp(SDNode *N);
257     SDValue visitSHL(SDNode *N);
258     SDValue visitSRA(SDNode *N);
259     SDValue visitSRL(SDNode *N);
260     SDValue visitRotate(SDNode *N);
261     SDValue visitBSWAP(SDNode *N);
262     SDValue visitBITREVERSE(SDNode *N);
263     SDValue visitCTLZ(SDNode *N);
264     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
265     SDValue visitCTTZ(SDNode *N);
266     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
267     SDValue visitCTPOP(SDNode *N);
268     SDValue visitSELECT(SDNode *N);
269     SDValue visitVSELECT(SDNode *N);
270     SDValue visitSELECT_CC(SDNode *N);
271     SDValue visitSETCC(SDNode *N);
272     SDValue visitSETCCE(SDNode *N);
273     SDValue visitSIGN_EXTEND(SDNode *N);
274     SDValue visitZERO_EXTEND(SDNode *N);
275     SDValue visitANY_EXTEND(SDNode *N);
276     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
277     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
278     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
279     SDValue visitTRUNCATE(SDNode *N);
280     SDValue visitBITCAST(SDNode *N);
281     SDValue visitBUILD_PAIR(SDNode *N);
282     SDValue visitFADD(SDNode *N);
283     SDValue visitFSUB(SDNode *N);
284     SDValue visitFMUL(SDNode *N);
285     SDValue visitFMA(SDNode *N);
286     SDValue visitFDIV(SDNode *N);
287     SDValue visitFREM(SDNode *N);
288     SDValue visitFSQRT(SDNode *N);
289     SDValue visitFCOPYSIGN(SDNode *N);
290     SDValue visitSINT_TO_FP(SDNode *N);
291     SDValue visitUINT_TO_FP(SDNode *N);
292     SDValue visitFP_TO_SINT(SDNode *N);
293     SDValue visitFP_TO_UINT(SDNode *N);
294     SDValue visitFP_ROUND(SDNode *N);
295     SDValue visitFP_ROUND_INREG(SDNode *N);
296     SDValue visitFP_EXTEND(SDNode *N);
297     SDValue visitFNEG(SDNode *N);
298     SDValue visitFABS(SDNode *N);
299     SDValue visitFCEIL(SDNode *N);
300     SDValue visitFTRUNC(SDNode *N);
301     SDValue visitFFLOOR(SDNode *N);
302     SDValue visitFMINNUM(SDNode *N);
303     SDValue visitFMAXNUM(SDNode *N);
304     SDValue visitBRCOND(SDNode *N);
305     SDValue visitBR_CC(SDNode *N);
306     SDValue visitLOAD(SDNode *N);
307 
308     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
309     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
310 
311     SDValue visitSTORE(SDNode *N);
312     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
313     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
314     SDValue visitBUILD_VECTOR(SDNode *N);
315     SDValue visitCONCAT_VECTORS(SDNode *N);
316     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
317     SDValue visitVECTOR_SHUFFLE(SDNode *N);
318     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
319     SDValue visitINSERT_SUBVECTOR(SDNode *N);
320     SDValue visitMLOAD(SDNode *N);
321     SDValue visitMSTORE(SDNode *N);
322     SDValue visitMGATHER(SDNode *N);
323     SDValue visitMSCATTER(SDNode *N);
324     SDValue visitFP_TO_FP16(SDNode *N);
325     SDValue visitFP16_TO_FP(SDNode *N);
326 
327     SDValue visitFADDForFMACombine(SDNode *N);
328     SDValue visitFSUBForFMACombine(SDNode *N);
329     SDValue visitFMULForFMACombine(SDNode *N);
330 
331     SDValue XformToShuffleWithZero(SDNode *N);
332     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
333                            SDValue RHS);
334 
335     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
336 
337     SDValue foldSelectOfConstants(SDNode *N);
338     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
339     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
340     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
341     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
342                              SDValue N2, SDValue N3, ISD::CondCode CC,
343                              bool NotExtCompare = false);
344     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
345                           const SDLoc &DL, bool foldBooleans = true);
346 
347     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
348                            SDValue &CC) const;
349     bool isOneUseSetCC(SDValue N) const;
350 
351     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
352                                          unsigned HiOp);
353     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
354     SDValue CombineExtLoad(SDNode *N);
355     SDValue combineRepeatedFPDivisors(SDNode *N);
356     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
357     SDValue BuildSDIV(SDNode *N);
358     SDValue BuildSDIVPow2(SDNode *N);
359     SDValue BuildUDIV(SDNode *N);
360     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
361     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
362     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
363     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
364     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
365                                 SDNodeFlags *Flags, bool Reciprocal);
366     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
367                                 SDNodeFlags *Flags, bool Reciprocal);
368     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
369                                bool DemandHighBits = true);
370     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
371     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
372                               SDValue InnerPos, SDValue InnerNeg,
373                               unsigned PosOpcode, unsigned NegOpcode,
374                               const SDLoc &DL);
375     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
376     SDValue ReduceLoadWidth(SDNode *N);
377     SDValue ReduceLoadOpStoreWidth(SDNode *N);
378     SDValue splitMergedValStore(StoreSDNode *ST);
379     SDValue TransformFPLoadStorePair(SDNode *N);
380     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
381     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
382     SDValue reduceBuildVecToShuffle(SDNode *N);
383     SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
384                                   SDValue VecIn1, SDValue VecIn2,
385                                   unsigned LeftIdx);
386 
387     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
388 
389     /// Walk up chain skipping non-aliasing memory nodes,
390     /// looking for aliasing nodes and adding them to the Aliases vector.
391     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
392                           SmallVectorImpl<SDValue> &Aliases);
393 
394     /// Return true if there is any possibility that the two addresses overlap.
395     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
396 
397     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
398     /// chain (aliasing node.)
399     SDValue FindBetterChain(SDNode *N, SDValue Chain);
400 
401     /// Try to replace a store and any possibly adjacent stores on
402     /// consecutive chains with better chains. Return true only if St is
403     /// replaced.
404     ///
405     /// Notice that other chains may still be replaced even if the function
406     /// returns false.
407     bool findBetterNeighborChains(StoreSDNode *St);
408 
409     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
410     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
411 
412     /// Holds a pointer to an LSBaseSDNode as well as information on where it
413     /// is located in a sequence of memory operations connected by a chain.
414     struct MemOpLink {
415       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
416       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
417       // Ptr to the mem node.
418       LSBaseSDNode *MemNode;
419       // Offset from the base ptr.
420       int64_t OffsetFromBase;
421       // What is the sequence number of this mem node.
422       // Lowest mem operand in the DAG starts at zero.
423       unsigned SequenceNum;
424     };
425 
426     /// This is a helper function for visitMUL to check the profitability
427     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
428     /// MulNode is the original multiply, AddNode is (add x, c1),
429     /// and ConstNode is c2.
430     bool isMulAddWithConstProfitable(SDNode *MulNode,
431                                      SDValue &AddNode,
432                                      SDValue &ConstNode);
433 
434     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
435     /// constant build_vector of the stored constant values in Stores.
436     SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
437                                          ArrayRef<MemOpLink> Stores,
438                                          SmallVectorImpl<SDValue> &Chains,
439                                          EVT Ty) const;
440 
441     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
442     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
443     /// the type of the loaded value to be extended.  LoadedVT returns the type
444     /// of the original loaded value.  NarrowLoad returns whether the load would
445     /// need to be narrowed in order to match.
446     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
447                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
448                           bool &NarrowLoad);
449 
450     /// This is a helper function for MergeConsecutiveStores. When the source
451     /// elements of the consecutive stores are all constants or all extracted
452     /// vector elements, try to merge them into one larger store.
453     /// \return True if a merged store was created.
454     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
455                                          EVT MemVT, unsigned NumStores,
456                                          bool IsConstantSrc, bool UseVector);
457 
458     /// This is a helper function for MergeConsecutiveStores.
459     /// Stores that may be merged are placed in StoreNodes.
460     /// Loads that may alias with those stores are placed in AliasLoadNodes.
461     void getStoreMergeAndAliasCandidates(
462         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
463         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
464 
465     /// Helper function for MergeConsecutiveStores. Checks if
466     /// Candidate stores have indirect dependency through their
467     /// operands. \return True if safe to merge
468     bool checkMergeStoreCandidatesForDependencies(
469         SmallVectorImpl<MemOpLink> &StoreNodes);
470 
471     /// Merge consecutive store operations into a wide store.
472     /// This optimization uses wide integers or vectors when possible.
473     /// \return True if some memory operations were changed.
474     bool MergeConsecutiveStores(StoreSDNode *N);
475 
476     /// \brief Try to transform a truncation where C is a constant:
477     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
478     ///
479     /// \p N needs to be a truncation and its first operand an AND. Other
480     /// requirements are checked by the function (e.g. that trunc is
481     /// single-use) and if missed an empty SDValue is returned.
482     SDValue distributeTruncateThroughAnd(SDNode *N);
483 
484   public:
485     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
486         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
487           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
488       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
489     }
490 
491     /// Runs the dag combiner on all nodes in the work list
492     void Run(CombineLevel AtLevel);
493 
494     SelectionDAG &getDAG() const { return DAG; }
495 
496     /// Returns a type large enough to hold any valid shift amount - before type
497     /// legalization these can be huge.
498     EVT getShiftAmountTy(EVT LHSTy) {
499       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
500       if (LHSTy.isVector())
501         return LHSTy;
502       auto &DL = DAG.getDataLayout();
503       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
504                         : TLI.getPointerTy(DL);
505     }
506 
507     /// This method returns true if we are running before type legalization or
508     /// if the specified VT is legal.
509     bool isTypeLegal(const EVT &VT) {
510       if (!LegalTypes) return true;
511       return TLI.isTypeLegal(VT);
512     }
513 
514     /// Convenience wrapper around TargetLowering::getSetCCResultType
515     EVT getSetCCResultType(EVT VT) const {
516       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
517     }
518   };
519 }
520 
521 
522 namespace {
523 /// This class is a DAGUpdateListener that removes any deleted
524 /// nodes from the worklist.
525 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
526   DAGCombiner &DC;
527 public:
528   explicit WorklistRemover(DAGCombiner &dc)
529     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
530 
531   void NodeDeleted(SDNode *N, SDNode *E) override {
532     DC.removeFromWorklist(N);
533   }
534 };
535 }
536 
537 //===----------------------------------------------------------------------===//
538 //  TargetLowering::DAGCombinerInfo implementation
539 //===----------------------------------------------------------------------===//
540 
541 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
542   ((DAGCombiner*)DC)->AddToWorklist(N);
543 }
544 
545 SDValue TargetLowering::DAGCombinerInfo::
546 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
547   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
548 }
549 
550 SDValue TargetLowering::DAGCombinerInfo::
551 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
552   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
553 }
554 
555 
556 SDValue TargetLowering::DAGCombinerInfo::
557 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
558   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
559 }
560 
561 void TargetLowering::DAGCombinerInfo::
562 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
563   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
564 }
565 
566 //===----------------------------------------------------------------------===//
567 // Helper Functions
568 //===----------------------------------------------------------------------===//
569 
570 void DAGCombiner::deleteAndRecombine(SDNode *N) {
571   removeFromWorklist(N);
572 
573   // If the operands of this node are only used by the node, they will now be
574   // dead. Make sure to re-visit them and recursively delete dead nodes.
575   for (const SDValue &Op : N->ops())
576     // For an operand generating multiple values, one of the values may
577     // become dead allowing further simplification (e.g. split index
578     // arithmetic from an indexed load).
579     if (Op->hasOneUse() || Op->getNumValues() > 1)
580       AddToWorklist(Op.getNode());
581 
582   DAG.DeleteNode(N);
583 }
584 
585 /// Return 1 if we can compute the negated form of the specified expression for
586 /// the same cost as the expression itself, or 2 if we can compute the negated
587 /// form more cheaply than the expression itself.
588 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
589                                const TargetLowering &TLI,
590                                const TargetOptions *Options,
591                                unsigned Depth = 0) {
592   // fneg is removable even if it has multiple uses.
593   if (Op.getOpcode() == ISD::FNEG) return 2;
594 
595   // Don't allow anything with multiple uses.
596   if (!Op.hasOneUse()) return 0;
597 
598   // Don't recurse exponentially.
599   if (Depth > 6) return 0;
600 
601   switch (Op.getOpcode()) {
602   default: return false;
603   case ISD::ConstantFP:
604     // Don't invert constant FP values after legalize.  The negated constant
605     // isn't necessarily legal.
606     return LegalOperations ? 0 : 1;
607   case ISD::FADD:
608     // FIXME: determine better conditions for this xform.
609     if (!Options->UnsafeFPMath) return 0;
610 
611     // After operation legalization, it might not be legal to create new FSUBs.
612     if (LegalOperations &&
613         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
614       return 0;
615 
616     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
617     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
618                                     Options, Depth + 1))
619       return V;
620     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
621     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
622                               Depth + 1);
623   case ISD::FSUB:
624     // We can't turn -(A-B) into B-A when we honor signed zeros.
625     if (!Options->UnsafeFPMath) return 0;
626 
627     // fold (fneg (fsub A, B)) -> (fsub B, A)
628     return 1;
629 
630   case ISD::FMUL:
631   case ISD::FDIV:
632     if (Options->HonorSignDependentRoundingFPMath()) return 0;
633 
634     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
635     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
636                                     Options, Depth + 1))
637       return V;
638 
639     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
640                               Depth + 1);
641 
642   case ISD::FP_EXTEND:
643   case ISD::FP_ROUND:
644   case ISD::FSIN:
645     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
646                               Depth + 1);
647   }
648 }
649 
650 /// If isNegatibleForFree returns true, return the newly negated expression.
651 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
652                                     bool LegalOperations, unsigned Depth = 0) {
653   const TargetOptions &Options = DAG.getTarget().Options;
654   // fneg is removable even if it has multiple uses.
655   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
656 
657   // Don't allow anything with multiple uses.
658   assert(Op.hasOneUse() && "Unknown reuse!");
659 
660   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
661 
662   const SDNodeFlags *Flags = Op.getNode()->getFlags();
663 
664   switch (Op.getOpcode()) {
665   default: llvm_unreachable("Unknown code");
666   case ISD::ConstantFP: {
667     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
668     V.changeSign();
669     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
670   }
671   case ISD::FADD:
672     // FIXME: determine better conditions for this xform.
673     assert(Options.UnsafeFPMath);
674 
675     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
676     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
677                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
678       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
679                          GetNegatedExpression(Op.getOperand(0), DAG,
680                                               LegalOperations, Depth+1),
681                          Op.getOperand(1), Flags);
682     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
683     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
684                        GetNegatedExpression(Op.getOperand(1), DAG,
685                                             LegalOperations, Depth+1),
686                        Op.getOperand(0), Flags);
687   case ISD::FSUB:
688     // We can't turn -(A-B) into B-A when we honor signed zeros.
689     assert(Options.UnsafeFPMath);
690 
691     // fold (fneg (fsub 0, B)) -> B
692     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
693       if (N0CFP->isZero())
694         return Op.getOperand(1);
695 
696     // fold (fneg (fsub A, B)) -> (fsub B, A)
697     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
698                        Op.getOperand(1), Op.getOperand(0), Flags);
699 
700   case ISD::FMUL:
701   case ISD::FDIV:
702     assert(!Options.HonorSignDependentRoundingFPMath());
703 
704     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
705     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
706                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
707       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
708                          GetNegatedExpression(Op.getOperand(0), DAG,
709                                               LegalOperations, Depth+1),
710                          Op.getOperand(1), Flags);
711 
712     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
713     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
714                        Op.getOperand(0),
715                        GetNegatedExpression(Op.getOperand(1), DAG,
716                                             LegalOperations, Depth+1), Flags);
717 
718   case ISD::FP_EXTEND:
719   case ISD::FSIN:
720     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
721                        GetNegatedExpression(Op.getOperand(0), DAG,
722                                             LegalOperations, Depth+1));
723   case ISD::FP_ROUND:
724       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
725                          GetNegatedExpression(Op.getOperand(0), DAG,
726                                               LegalOperations, Depth+1),
727                          Op.getOperand(1));
728   }
729 }
730 
731 // APInts must be the same size for most operations, this helper
732 // function zero extends the shorter of the pair so that they match.
733 // We provide an Offset so that we can create bitwidths that won't overflow.
734 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
735   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
736   LHS = LHS.zextOrSelf(Bits);
737   RHS = RHS.zextOrSelf(Bits);
738 }
739 
740 // Return true if this node is a setcc, or is a select_cc
741 // that selects between the target values used for true and false, making it
742 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
743 // the appropriate nodes based on the type of node we are checking. This
744 // simplifies life a bit for the callers.
745 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
746                                     SDValue &CC) const {
747   if (N.getOpcode() == ISD::SETCC) {
748     LHS = N.getOperand(0);
749     RHS = N.getOperand(1);
750     CC  = N.getOperand(2);
751     return true;
752   }
753 
754   if (N.getOpcode() != ISD::SELECT_CC ||
755       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
756       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
757     return false;
758 
759   if (TLI.getBooleanContents(N.getValueType()) ==
760       TargetLowering::UndefinedBooleanContent)
761     return false;
762 
763   LHS = N.getOperand(0);
764   RHS = N.getOperand(1);
765   CC  = N.getOperand(4);
766   return true;
767 }
768 
769 /// Return true if this is a SetCC-equivalent operation with only one use.
770 /// If this is true, it allows the users to invert the operation for free when
771 /// it is profitable to do so.
772 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
773   SDValue N0, N1, N2;
774   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
775     return true;
776   return false;
777 }
778 
779 // \brief Returns the SDNode if it is a constant float BuildVector
780 // or constant float.
781 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
782   if (isa<ConstantFPSDNode>(N))
783     return N.getNode();
784   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
785     return N.getNode();
786   return nullptr;
787 }
788 
789 // Determines if it is a constant integer or a build vector of constant
790 // integers (and undefs).
791 // Do not permit build vector implicit truncation.
792 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
793   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
794     return !(Const->isOpaque() && NoOpaques);
795   if (N.getOpcode() != ISD::BUILD_VECTOR)
796     return false;
797   unsigned BitWidth = N.getScalarValueSizeInBits();
798   for (const SDValue &Op : N->op_values()) {
799     if (Op.isUndef())
800       continue;
801     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
802     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
803         (Const->isOpaque() && NoOpaques))
804       return false;
805   }
806   return true;
807 }
808 
809 // Determines if it is a constant null integer or a splatted vector of a
810 // constant null integer (with no undefs).
811 // Build vector implicit truncation is not an issue for null values.
812 static bool isNullConstantOrNullSplatConstant(SDValue N) {
813   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
814     return Splat->isNullValue();
815   return false;
816 }
817 
818 // Determines if it is a constant integer of one or a splatted vector of a
819 // constant integer of one (with no undefs).
820 // Do not permit build vector implicit truncation.
821 static bool isOneConstantOrOneSplatConstant(SDValue N) {
822   unsigned BitWidth = N.getScalarValueSizeInBits();
823   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
824     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
825   return false;
826 }
827 
828 // Determines if it is a constant integer of all ones or a splatted vector of a
829 // constant integer of all ones (with no undefs).
830 // Do not permit build vector implicit truncation.
831 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
832   unsigned BitWidth = N.getScalarValueSizeInBits();
833   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
834     return Splat->isAllOnesValue() &&
835            Splat->getAPIntValue().getBitWidth() == BitWidth;
836   return false;
837 }
838 
839 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
840                                     SDValue N1) {
841   EVT VT = N0.getValueType();
842   if (N0.getOpcode() == Opc) {
843     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
844       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
845         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
846         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
847           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
848         return SDValue();
849       }
850       if (N0.hasOneUse()) {
851         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
852         // use
853         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
854         if (!OpNode.getNode())
855           return SDValue();
856         AddToWorklist(OpNode.getNode());
857         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
858       }
859     }
860   }
861 
862   if (N1.getOpcode() == Opc) {
863     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
864       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
865         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
866         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
867           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
868         return SDValue();
869       }
870       if (N1.hasOneUse()) {
871         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
872         // use
873         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
874         if (!OpNode.getNode())
875           return SDValue();
876         AddToWorklist(OpNode.getNode());
877         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
878       }
879     }
880   }
881 
882   return SDValue();
883 }
884 
885 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
886                                bool AddTo) {
887   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
888   ++NodesCombined;
889   DEBUG(dbgs() << "\nReplacing.1 ";
890         N->dump(&DAG);
891         dbgs() << "\nWith: ";
892         To[0].getNode()->dump(&DAG);
893         dbgs() << " and " << NumTo-1 << " other values\n");
894   for (unsigned i = 0, e = NumTo; i != e; ++i)
895     assert((!To[i].getNode() ||
896             N->getValueType(i) == To[i].getValueType()) &&
897            "Cannot combine value to value of different type!");
898 
899   WorklistRemover DeadNodes(*this);
900   DAG.ReplaceAllUsesWith(N, To);
901   if (AddTo) {
902     // Push the new nodes and any users onto the worklist
903     for (unsigned i = 0, e = NumTo; i != e; ++i) {
904       if (To[i].getNode()) {
905         AddToWorklist(To[i].getNode());
906         AddUsersToWorklist(To[i].getNode());
907       }
908     }
909   }
910 
911   // Finally, if the node is now dead, remove it from the graph.  The node
912   // may not be dead if the replacement process recursively simplified to
913   // something else needing this node.
914   if (N->use_empty())
915     deleteAndRecombine(N);
916   return SDValue(N, 0);
917 }
918 
919 void DAGCombiner::
920 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
921   // Replace all uses.  If any nodes become isomorphic to other nodes and
922   // are deleted, make sure to remove them from our worklist.
923   WorklistRemover DeadNodes(*this);
924   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
925 
926   // Push the new node and any (possibly new) users onto the worklist.
927   AddToWorklist(TLO.New.getNode());
928   AddUsersToWorklist(TLO.New.getNode());
929 
930   // Finally, if the node is now dead, remove it from the graph.  The node
931   // may not be dead if the replacement process recursively simplified to
932   // something else needing this node.
933   if (TLO.Old.getNode()->use_empty())
934     deleteAndRecombine(TLO.Old.getNode());
935 }
936 
937 /// Check the specified integer node value to see if it can be simplified or if
938 /// things it uses can be simplified by bit propagation. If so, return true.
939 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
940   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
941   APInt KnownZero, KnownOne;
942   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
943     return false;
944 
945   // Revisit the node.
946   AddToWorklist(Op.getNode());
947 
948   // Replace the old value with the new one.
949   ++NodesCombined;
950   DEBUG(dbgs() << "\nReplacing.2 ";
951         TLO.Old.getNode()->dump(&DAG);
952         dbgs() << "\nWith: ";
953         TLO.New.getNode()->dump(&DAG);
954         dbgs() << '\n');
955 
956   CommitTargetLoweringOpt(TLO);
957   return true;
958 }
959 
960 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
961   SDLoc DL(Load);
962   EVT VT = Load->getValueType(0);
963   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
964 
965   DEBUG(dbgs() << "\nReplacing.9 ";
966         Load->dump(&DAG);
967         dbgs() << "\nWith: ";
968         Trunc.getNode()->dump(&DAG);
969         dbgs() << '\n');
970   WorklistRemover DeadNodes(*this);
971   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
972   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
973   deleteAndRecombine(Load);
974   AddToWorklist(Trunc.getNode());
975 }
976 
977 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
978   Replace = false;
979   SDLoc DL(Op);
980   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
981     LoadSDNode *LD = cast<LoadSDNode>(Op);
982     EVT MemVT = LD->getMemoryVT();
983     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
984       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
985                                                        : ISD::EXTLOAD)
986       : LD->getExtensionType();
987     Replace = true;
988     return DAG.getExtLoad(ExtType, DL, PVT,
989                           LD->getChain(), LD->getBasePtr(),
990                           MemVT, LD->getMemOperand());
991   }
992 
993   unsigned Opc = Op.getOpcode();
994   switch (Opc) {
995   default: break;
996   case ISD::AssertSext:
997     return DAG.getNode(ISD::AssertSext, DL, PVT,
998                        SExtPromoteOperand(Op.getOperand(0), PVT),
999                        Op.getOperand(1));
1000   case ISD::AssertZext:
1001     return DAG.getNode(ISD::AssertZext, DL, PVT,
1002                        ZExtPromoteOperand(Op.getOperand(0), PVT),
1003                        Op.getOperand(1));
1004   case ISD::Constant: {
1005     unsigned ExtOpc =
1006       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1007     return DAG.getNode(ExtOpc, DL, PVT, Op);
1008   }
1009   }
1010 
1011   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1012     return SDValue();
1013   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1014 }
1015 
1016 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1017   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1018     return SDValue();
1019   EVT OldVT = Op.getValueType();
1020   SDLoc DL(Op);
1021   bool Replace = false;
1022   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1023   if (!NewOp.getNode())
1024     return SDValue();
1025   AddToWorklist(NewOp.getNode());
1026 
1027   if (Replace)
1028     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1029   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1030                      DAG.getValueType(OldVT));
1031 }
1032 
1033 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1034   EVT OldVT = Op.getValueType();
1035   SDLoc DL(Op);
1036   bool Replace = false;
1037   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1038   if (!NewOp.getNode())
1039     return SDValue();
1040   AddToWorklist(NewOp.getNode());
1041 
1042   if (Replace)
1043     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1044   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1045 }
1046 
1047 /// Promote the specified integer binary operation if the target indicates it is
1048 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1049 /// i32 since i16 instructions are longer.
1050 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1051   if (!LegalOperations)
1052     return SDValue();
1053 
1054   EVT VT = Op.getValueType();
1055   if (VT.isVector() || !VT.isInteger())
1056     return SDValue();
1057 
1058   // If operation type is 'undesirable', e.g. i16 on x86, consider
1059   // promoting it.
1060   unsigned Opc = Op.getOpcode();
1061   if (TLI.isTypeDesirableForOp(Opc, VT))
1062     return SDValue();
1063 
1064   EVT PVT = VT;
1065   // Consult target whether it is a good idea to promote this operation and
1066   // what's the right type to promote it to.
1067   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1068     assert(PVT != VT && "Don't know what type to promote to!");
1069 
1070     bool Replace0 = false;
1071     SDValue N0 = Op.getOperand(0);
1072     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1073     if (!NN0.getNode())
1074       return SDValue();
1075 
1076     bool Replace1 = false;
1077     SDValue N1 = Op.getOperand(1);
1078     SDValue NN1;
1079     if (N0 == N1)
1080       NN1 = NN0;
1081     else {
1082       NN1 = PromoteOperand(N1, PVT, Replace1);
1083       if (!NN1.getNode())
1084         return SDValue();
1085     }
1086 
1087     AddToWorklist(NN0.getNode());
1088     if (NN1.getNode())
1089       AddToWorklist(NN1.getNode());
1090 
1091     if (Replace0)
1092       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1093     if (Replace1)
1094       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1095 
1096     DEBUG(dbgs() << "\nPromoting ";
1097           Op.getNode()->dump(&DAG));
1098     SDLoc DL(Op);
1099     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1100                        DAG.getNode(Opc, DL, PVT, NN0, NN1));
1101   }
1102   return SDValue();
1103 }
1104 
1105 /// Promote the specified integer shift operation if the target indicates it is
1106 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1107 /// i32 since i16 instructions are longer.
1108 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1109   if (!LegalOperations)
1110     return SDValue();
1111 
1112   EVT VT = Op.getValueType();
1113   if (VT.isVector() || !VT.isInteger())
1114     return SDValue();
1115 
1116   // If operation type is 'undesirable', e.g. i16 on x86, consider
1117   // promoting it.
1118   unsigned Opc = Op.getOpcode();
1119   if (TLI.isTypeDesirableForOp(Opc, VT))
1120     return SDValue();
1121 
1122   EVT PVT = VT;
1123   // Consult target whether it is a good idea to promote this operation and
1124   // what's the right type to promote it to.
1125   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1126     assert(PVT != VT && "Don't know what type to promote to!");
1127 
1128     bool Replace = false;
1129     SDValue N0 = Op.getOperand(0);
1130     if (Opc == ISD::SRA)
1131       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1132     else if (Opc == ISD::SRL)
1133       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1134     else
1135       N0 = PromoteOperand(N0, PVT, Replace);
1136     if (!N0.getNode())
1137       return SDValue();
1138 
1139     AddToWorklist(N0.getNode());
1140     if (Replace)
1141       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1142 
1143     DEBUG(dbgs() << "\nPromoting ";
1144           Op.getNode()->dump(&DAG));
1145     SDLoc DL(Op);
1146     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1147                        DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
1148   }
1149   return SDValue();
1150 }
1151 
1152 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1153   if (!LegalOperations)
1154     return SDValue();
1155 
1156   EVT VT = Op.getValueType();
1157   if (VT.isVector() || !VT.isInteger())
1158     return SDValue();
1159 
1160   // If operation type is 'undesirable', e.g. i16 on x86, consider
1161   // promoting it.
1162   unsigned Opc = Op.getOpcode();
1163   if (TLI.isTypeDesirableForOp(Opc, VT))
1164     return SDValue();
1165 
1166   EVT PVT = VT;
1167   // Consult target whether it is a good idea to promote this operation and
1168   // what's the right type to promote it to.
1169   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1170     assert(PVT != VT && "Don't know what type to promote to!");
1171     // fold (aext (aext x)) -> (aext x)
1172     // fold (aext (zext x)) -> (zext x)
1173     // fold (aext (sext x)) -> (sext x)
1174     DEBUG(dbgs() << "\nPromoting ";
1175           Op.getNode()->dump(&DAG));
1176     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1177   }
1178   return SDValue();
1179 }
1180 
1181 bool DAGCombiner::PromoteLoad(SDValue Op) {
1182   if (!LegalOperations)
1183     return false;
1184 
1185   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1186     return false;
1187 
1188   EVT VT = Op.getValueType();
1189   if (VT.isVector() || !VT.isInteger())
1190     return false;
1191 
1192   // If operation type is 'undesirable', e.g. i16 on x86, consider
1193   // promoting it.
1194   unsigned Opc = Op.getOpcode();
1195   if (TLI.isTypeDesirableForOp(Opc, VT))
1196     return false;
1197 
1198   EVT PVT = VT;
1199   // Consult target whether it is a good idea to promote this operation and
1200   // what's the right type to promote it to.
1201   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1202     assert(PVT != VT && "Don't know what type to promote to!");
1203 
1204     SDLoc DL(Op);
1205     SDNode *N = Op.getNode();
1206     LoadSDNode *LD = cast<LoadSDNode>(N);
1207     EVT MemVT = LD->getMemoryVT();
1208     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1209       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1210                                                        : ISD::EXTLOAD)
1211       : LD->getExtensionType();
1212     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1213                                    LD->getChain(), LD->getBasePtr(),
1214                                    MemVT, LD->getMemOperand());
1215     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1216 
1217     DEBUG(dbgs() << "\nPromoting ";
1218           N->dump(&DAG);
1219           dbgs() << "\nTo: ";
1220           Result.getNode()->dump(&DAG);
1221           dbgs() << '\n');
1222     WorklistRemover DeadNodes(*this);
1223     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1224     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1225     deleteAndRecombine(N);
1226     AddToWorklist(Result.getNode());
1227     return true;
1228   }
1229   return false;
1230 }
1231 
1232 /// \brief Recursively delete a node which has no uses and any operands for
1233 /// which it is the only use.
1234 ///
1235 /// Note that this both deletes the nodes and removes them from the worklist.
1236 /// It also adds any nodes who have had a user deleted to the worklist as they
1237 /// may now have only one use and subject to other combines.
1238 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1239   if (!N->use_empty())
1240     return false;
1241 
1242   SmallSetVector<SDNode *, 16> Nodes;
1243   Nodes.insert(N);
1244   do {
1245     N = Nodes.pop_back_val();
1246     if (!N)
1247       continue;
1248 
1249     if (N->use_empty()) {
1250       for (const SDValue &ChildN : N->op_values())
1251         Nodes.insert(ChildN.getNode());
1252 
1253       removeFromWorklist(N);
1254       DAG.DeleteNode(N);
1255     } else {
1256       AddToWorklist(N);
1257     }
1258   } while (!Nodes.empty());
1259   return true;
1260 }
1261 
1262 //===----------------------------------------------------------------------===//
1263 //  Main DAG Combiner implementation
1264 //===----------------------------------------------------------------------===//
1265 
1266 void DAGCombiner::Run(CombineLevel AtLevel) {
1267   // set the instance variables, so that the various visit routines may use it.
1268   Level = AtLevel;
1269   LegalOperations = Level >= AfterLegalizeVectorOps;
1270   LegalTypes = Level >= AfterLegalizeTypes;
1271 
1272   // Add all the dag nodes to the worklist.
1273   for (SDNode &Node : DAG.allnodes())
1274     AddToWorklist(&Node);
1275 
1276   // Create a dummy node (which is not added to allnodes), that adds a reference
1277   // to the root node, preventing it from being deleted, and tracking any
1278   // changes of the root.
1279   HandleSDNode Dummy(DAG.getRoot());
1280 
1281   // While the worklist isn't empty, find a node and try to combine it.
1282   while (!WorklistMap.empty()) {
1283     SDNode *N;
1284     // The Worklist holds the SDNodes in order, but it may contain null entries.
1285     do {
1286       N = Worklist.pop_back_val();
1287     } while (!N);
1288 
1289     bool GoodWorklistEntry = WorklistMap.erase(N);
1290     (void)GoodWorklistEntry;
1291     assert(GoodWorklistEntry &&
1292            "Found a worklist entry without a corresponding map entry!");
1293 
1294     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1295     // N is deleted from the DAG, since they too may now be dead or may have a
1296     // reduced number of uses, allowing other xforms.
1297     if (recursivelyDeleteUnusedNodes(N))
1298       continue;
1299 
1300     WorklistRemover DeadNodes(*this);
1301 
1302     // If this combine is running after legalizing the DAG, re-legalize any
1303     // nodes pulled off the worklist.
1304     if (Level == AfterLegalizeDAG) {
1305       SmallSetVector<SDNode *, 16> UpdatedNodes;
1306       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1307 
1308       for (SDNode *LN : UpdatedNodes) {
1309         AddToWorklist(LN);
1310         AddUsersToWorklist(LN);
1311       }
1312       if (!NIsValid)
1313         continue;
1314     }
1315 
1316     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1317 
1318     // Add any operands of the new node which have not yet been combined to the
1319     // worklist as well. Because the worklist uniques things already, this
1320     // won't repeatedly process the same operand.
1321     CombinedNodes.insert(N);
1322     for (const SDValue &ChildN : N->op_values())
1323       if (!CombinedNodes.count(ChildN.getNode()))
1324         AddToWorklist(ChildN.getNode());
1325 
1326     SDValue RV = combine(N);
1327 
1328     if (!RV.getNode())
1329       continue;
1330 
1331     ++NodesCombined;
1332 
1333     // If we get back the same node we passed in, rather than a new node or
1334     // zero, we know that the node must have defined multiple values and
1335     // CombineTo was used.  Since CombineTo takes care of the worklist
1336     // mechanics for us, we have no work to do in this case.
1337     if (RV.getNode() == N)
1338       continue;
1339 
1340     assert(N->getOpcode() != ISD::DELETED_NODE &&
1341            RV.getOpcode() != ISD::DELETED_NODE &&
1342            "Node was deleted but visit returned new node!");
1343 
1344     DEBUG(dbgs() << " ... into: ";
1345           RV.getNode()->dump(&DAG));
1346 
1347     if (N->getNumValues() == RV.getNode()->getNumValues())
1348       DAG.ReplaceAllUsesWith(N, RV.getNode());
1349     else {
1350       assert(N->getValueType(0) == RV.getValueType() &&
1351              N->getNumValues() == 1 && "Type mismatch");
1352       SDValue OpV = RV;
1353       DAG.ReplaceAllUsesWith(N, &OpV);
1354     }
1355 
1356     // Push the new node and any users onto the worklist
1357     AddToWorklist(RV.getNode());
1358     AddUsersToWorklist(RV.getNode());
1359 
1360     // Finally, if the node is now dead, remove it from the graph.  The node
1361     // may not be dead if the replacement process recursively simplified to
1362     // something else needing this node. This will also take care of adding any
1363     // operands which have lost a user to the worklist.
1364     recursivelyDeleteUnusedNodes(N);
1365   }
1366 
1367   // If the root changed (e.g. it was a dead load, update the root).
1368   DAG.setRoot(Dummy.getValue());
1369   DAG.RemoveDeadNodes();
1370 }
1371 
1372 SDValue DAGCombiner::visit(SDNode *N) {
1373   switch (N->getOpcode()) {
1374   default: break;
1375   case ISD::TokenFactor:        return visitTokenFactor(N);
1376   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1377   case ISD::ADD:                return visitADD(N);
1378   case ISD::SUB:                return visitSUB(N);
1379   case ISD::ADDC:               return visitADDC(N);
1380   case ISD::SUBC:               return visitSUBC(N);
1381   case ISD::ADDE:               return visitADDE(N);
1382   case ISD::SUBE:               return visitSUBE(N);
1383   case ISD::MUL:                return visitMUL(N);
1384   case ISD::SDIV:               return visitSDIV(N);
1385   case ISD::UDIV:               return visitUDIV(N);
1386   case ISD::SREM:
1387   case ISD::UREM:               return visitREM(N);
1388   case ISD::MULHU:              return visitMULHU(N);
1389   case ISD::MULHS:              return visitMULHS(N);
1390   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1391   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1392   case ISD::SMULO:              return visitSMULO(N);
1393   case ISD::UMULO:              return visitUMULO(N);
1394   case ISD::SMIN:
1395   case ISD::SMAX:
1396   case ISD::UMIN:
1397   case ISD::UMAX:               return visitIMINMAX(N);
1398   case ISD::AND:                return visitAND(N);
1399   case ISD::OR:                 return visitOR(N);
1400   case ISD::XOR:                return visitXOR(N);
1401   case ISD::SHL:                return visitSHL(N);
1402   case ISD::SRA:                return visitSRA(N);
1403   case ISD::SRL:                return visitSRL(N);
1404   case ISD::ROTR:
1405   case ISD::ROTL:               return visitRotate(N);
1406   case ISD::BSWAP:              return visitBSWAP(N);
1407   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1408   case ISD::CTLZ:               return visitCTLZ(N);
1409   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1410   case ISD::CTTZ:               return visitCTTZ(N);
1411   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1412   case ISD::CTPOP:              return visitCTPOP(N);
1413   case ISD::SELECT:             return visitSELECT(N);
1414   case ISD::VSELECT:            return visitVSELECT(N);
1415   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1416   case ISD::SETCC:              return visitSETCC(N);
1417   case ISD::SETCCE:             return visitSETCCE(N);
1418   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1419   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1420   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1421   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1422   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1423   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1424   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1425   case ISD::BITCAST:            return visitBITCAST(N);
1426   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1427   case ISD::FADD:               return visitFADD(N);
1428   case ISD::FSUB:               return visitFSUB(N);
1429   case ISD::FMUL:               return visitFMUL(N);
1430   case ISD::FMA:                return visitFMA(N);
1431   case ISD::FDIV:               return visitFDIV(N);
1432   case ISD::FREM:               return visitFREM(N);
1433   case ISD::FSQRT:              return visitFSQRT(N);
1434   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1435   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1436   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1437   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1438   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1439   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1440   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1441   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1442   case ISD::FNEG:               return visitFNEG(N);
1443   case ISD::FABS:               return visitFABS(N);
1444   case ISD::FFLOOR:             return visitFFLOOR(N);
1445   case ISD::FMINNUM:            return visitFMINNUM(N);
1446   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1447   case ISD::FCEIL:              return visitFCEIL(N);
1448   case ISD::FTRUNC:             return visitFTRUNC(N);
1449   case ISD::BRCOND:             return visitBRCOND(N);
1450   case ISD::BR_CC:              return visitBR_CC(N);
1451   case ISD::LOAD:               return visitLOAD(N);
1452   case ISD::STORE:              return visitSTORE(N);
1453   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1454   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1455   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1456   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1457   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1458   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1459   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1460   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1461   case ISD::MGATHER:            return visitMGATHER(N);
1462   case ISD::MLOAD:              return visitMLOAD(N);
1463   case ISD::MSCATTER:           return visitMSCATTER(N);
1464   case ISD::MSTORE:             return visitMSTORE(N);
1465   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1466   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1467   }
1468   return SDValue();
1469 }
1470 
1471 SDValue DAGCombiner::combine(SDNode *N) {
1472   SDValue RV = visit(N);
1473 
1474   // If nothing happened, try a target-specific DAG combine.
1475   if (!RV.getNode()) {
1476     assert(N->getOpcode() != ISD::DELETED_NODE &&
1477            "Node was deleted but visit returned NULL!");
1478 
1479     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1480         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1481 
1482       // Expose the DAG combiner to the target combiner impls.
1483       TargetLowering::DAGCombinerInfo
1484         DagCombineInfo(DAG, Level, false, this);
1485 
1486       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1487     }
1488   }
1489 
1490   // If nothing happened still, try promoting the operation.
1491   if (!RV.getNode()) {
1492     switch (N->getOpcode()) {
1493     default: break;
1494     case ISD::ADD:
1495     case ISD::SUB:
1496     case ISD::MUL:
1497     case ISD::AND:
1498     case ISD::OR:
1499     case ISD::XOR:
1500       RV = PromoteIntBinOp(SDValue(N, 0));
1501       break;
1502     case ISD::SHL:
1503     case ISD::SRA:
1504     case ISD::SRL:
1505       RV = PromoteIntShiftOp(SDValue(N, 0));
1506       break;
1507     case ISD::SIGN_EXTEND:
1508     case ISD::ZERO_EXTEND:
1509     case ISD::ANY_EXTEND:
1510       RV = PromoteExtend(SDValue(N, 0));
1511       break;
1512     case ISD::LOAD:
1513       if (PromoteLoad(SDValue(N, 0)))
1514         RV = SDValue(N, 0);
1515       break;
1516     }
1517   }
1518 
1519   // If N is a commutative binary node, try commuting it to enable more
1520   // sdisel CSE.
1521   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1522       N->getNumValues() == 1) {
1523     SDValue N0 = N->getOperand(0);
1524     SDValue N1 = N->getOperand(1);
1525 
1526     // Constant operands are canonicalized to RHS.
1527     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1528       SDValue Ops[] = {N1, N0};
1529       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1530                                             N->getFlags());
1531       if (CSENode)
1532         return SDValue(CSENode, 0);
1533     }
1534   }
1535 
1536   return RV;
1537 }
1538 
1539 /// Given a node, return its input chain if it has one, otherwise return a null
1540 /// sd operand.
1541 static SDValue getInputChainForNode(SDNode *N) {
1542   if (unsigned NumOps = N->getNumOperands()) {
1543     if (N->getOperand(0).getValueType() == MVT::Other)
1544       return N->getOperand(0);
1545     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1546       return N->getOperand(NumOps-1);
1547     for (unsigned i = 1; i < NumOps-1; ++i)
1548       if (N->getOperand(i).getValueType() == MVT::Other)
1549         return N->getOperand(i);
1550   }
1551   return SDValue();
1552 }
1553 
1554 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1555   // If N has two operands, where one has an input chain equal to the other,
1556   // the 'other' chain is redundant.
1557   if (N->getNumOperands() == 2) {
1558     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1559       return N->getOperand(0);
1560     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1561       return N->getOperand(1);
1562   }
1563 
1564   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1565   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
1566   SmallPtrSet<SDNode*, 16> SeenOps;
1567   bool Changed = false;             // If we should replace this token factor.
1568 
1569   // Start out with this token factor.
1570   TFs.push_back(N);
1571 
1572   // Iterate through token factors.  The TFs grows when new token factors are
1573   // encountered.
1574   for (unsigned i = 0; i < TFs.size(); ++i) {
1575     SDNode *TF = TFs[i];
1576 
1577     // Check each of the operands.
1578     for (const SDValue &Op : TF->op_values()) {
1579 
1580       switch (Op.getOpcode()) {
1581       case ISD::EntryToken:
1582         // Entry tokens don't need to be added to the list. They are
1583         // redundant.
1584         Changed = true;
1585         break;
1586 
1587       case ISD::TokenFactor:
1588         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1589           // Queue up for processing.
1590           TFs.push_back(Op.getNode());
1591           // Clean up in case the token factor is removed.
1592           AddToWorklist(Op.getNode());
1593           Changed = true;
1594           break;
1595         }
1596         LLVM_FALLTHROUGH;
1597 
1598       default:
1599         // Only add if it isn't already in the list.
1600         if (SeenOps.insert(Op.getNode()).second)
1601           Ops.push_back(Op);
1602         else
1603           Changed = true;
1604         break;
1605       }
1606     }
1607   }
1608 
1609   SDValue Result;
1610 
1611   // If we've changed things around then replace token factor.
1612   if (Changed) {
1613     if (Ops.empty()) {
1614       // The entry token is the only possible outcome.
1615       Result = DAG.getEntryNode();
1616     } else {
1617       // New and improved token factor.
1618       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1619     }
1620 
1621     // Add users to worklist if AA is enabled, since it may introduce
1622     // a lot of new chained token factors while removing memory deps.
1623     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1624       : DAG.getSubtarget().useAA();
1625     return CombineTo(N, Result, UseAA /*add to worklist*/);
1626   }
1627 
1628   return Result;
1629 }
1630 
1631 /// MERGE_VALUES can always be eliminated.
1632 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1633   WorklistRemover DeadNodes(*this);
1634   // Replacing results may cause a different MERGE_VALUES to suddenly
1635   // be CSE'd with N, and carry its uses with it. Iterate until no
1636   // uses remain, to ensure that the node can be safely deleted.
1637   // First add the users of this node to the work list so that they
1638   // can be tried again once they have new operands.
1639   AddUsersToWorklist(N);
1640   do {
1641     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1642       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1643   } while (!N->use_empty());
1644   deleteAndRecombine(N);
1645   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1646 }
1647 
1648 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1649 /// ConstantSDNode pointer else nullptr.
1650 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1651   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1652   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1653 }
1654 
1655 SDValue DAGCombiner::visitADD(SDNode *N) {
1656   SDValue N0 = N->getOperand(0);
1657   SDValue N1 = N->getOperand(1);
1658   EVT VT = N0.getValueType();
1659   SDLoc DL(N);
1660 
1661   // fold vector ops
1662   if (VT.isVector()) {
1663     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1664       return FoldedVOp;
1665 
1666     // fold (add x, 0) -> x, vector edition
1667     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1668       return N0;
1669     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1670       return N1;
1671   }
1672 
1673   // fold (add x, undef) -> undef
1674   if (N0.isUndef())
1675     return N0;
1676 
1677   if (N1.isUndef())
1678     return N1;
1679 
1680   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1681     // canonicalize constant to RHS
1682     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1683       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1684     // fold (add c1, c2) -> c1+c2
1685     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1686                                       N1.getNode());
1687   }
1688 
1689   // fold (add x, 0) -> x
1690   if (isNullConstant(N1))
1691     return N0;
1692 
1693   // fold ((c1-A)+c2) -> (c1+c2)-A
1694   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1695     if (N0.getOpcode() == ISD::SUB)
1696       if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1697         return DAG.getNode(ISD::SUB, DL, VT,
1698                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1699                            N0.getOperand(1));
1700       }
1701   }
1702 
1703   // reassociate add
1704   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1705     return RADD;
1706 
1707   // fold ((0-A) + B) -> B-A
1708   if (N0.getOpcode() == ISD::SUB &&
1709       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1710     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1711 
1712   // fold (A + (0-B)) -> A-B
1713   if (N1.getOpcode() == ISD::SUB &&
1714       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1715     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1716 
1717   // fold (A+(B-A)) -> B
1718   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1719     return N1.getOperand(0);
1720 
1721   // fold ((B-A)+A) -> B
1722   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1723     return N0.getOperand(0);
1724 
1725   // fold (A+(B-(A+C))) to (B-C)
1726   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1727       N0 == N1.getOperand(1).getOperand(0))
1728     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1729                        N1.getOperand(1).getOperand(1));
1730 
1731   // fold (A+(B-(C+A))) to (B-C)
1732   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1733       N0 == N1.getOperand(1).getOperand(1))
1734     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1735                        N1.getOperand(1).getOperand(0));
1736 
1737   // fold (A+((B-A)+or-C)) to (B+or-C)
1738   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1739       N1.getOperand(0).getOpcode() == ISD::SUB &&
1740       N0 == N1.getOperand(0).getOperand(1))
1741     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1742                        N1.getOperand(1));
1743 
1744   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1745   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1746     SDValue N00 = N0.getOperand(0);
1747     SDValue N01 = N0.getOperand(1);
1748     SDValue N10 = N1.getOperand(0);
1749     SDValue N11 = N1.getOperand(1);
1750 
1751     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1752       return DAG.getNode(ISD::SUB, DL, VT,
1753                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1754                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1755   }
1756 
1757   if (SimplifyDemandedBits(SDValue(N, 0)))
1758     return SDValue(N, 0);
1759 
1760   // fold (a+b) -> (a|b) iff a and b share no bits.
1761   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1762       VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
1763     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1764 
1765   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1766   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1767       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
1768     return DAG.getNode(ISD::SUB, DL, VT, N0,
1769                        DAG.getNode(ISD::SHL, DL, VT,
1770                                    N1.getOperand(0).getOperand(1),
1771                                    N1.getOperand(1)));
1772   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1773       isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
1774     return DAG.getNode(ISD::SUB, DL, VT, N1,
1775                        DAG.getNode(ISD::SHL, DL, VT,
1776                                    N0.getOperand(0).getOperand(1),
1777                                    N0.getOperand(1)));
1778 
1779   if (N1.getOpcode() == ISD::AND) {
1780     SDValue AndOp0 = N1.getOperand(0);
1781     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1782     unsigned DestBits = VT.getScalarSizeInBits();
1783 
1784     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1785     // and similar xforms where the inner op is either ~0 or 0.
1786     if (NumSignBits == DestBits &&
1787         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
1788       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1789   }
1790 
1791   // add (sext i1), X -> sub X, (zext i1)
1792   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1793       N0.getOperand(0).getValueType() == MVT::i1 &&
1794       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1795     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1796     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1797   }
1798 
1799   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1800   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1801     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1802     if (TN->getVT() == MVT::i1) {
1803       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1804                                  DAG.getConstant(1, DL, VT));
1805       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1806     }
1807   }
1808 
1809   return SDValue();
1810 }
1811 
1812 SDValue DAGCombiner::visitADDC(SDNode *N) {
1813   SDValue N0 = N->getOperand(0);
1814   SDValue N1 = N->getOperand(1);
1815   EVT VT = N0.getValueType();
1816 
1817   // If the flag result is dead, turn this into an ADD.
1818   if (!N->hasAnyUseOfValue(1))
1819     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1820                      DAG.getNode(ISD::CARRY_FALSE,
1821                                  SDLoc(N), MVT::Glue));
1822 
1823   // canonicalize constant to RHS.
1824   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1825   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1826   if (N0C && !N1C)
1827     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1828 
1829   // fold (addc x, 0) -> x + no carry out
1830   if (isNullConstant(N1))
1831     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1832                                         SDLoc(N), MVT::Glue));
1833 
1834   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1835   APInt LHSZero, LHSOne;
1836   APInt RHSZero, RHSOne;
1837   DAG.computeKnownBits(N0, LHSZero, LHSOne);
1838 
1839   if (LHSZero.getBoolValue()) {
1840     DAG.computeKnownBits(N1, RHSZero, RHSOne);
1841 
1842     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1843     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1844     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1845       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1846                        DAG.getNode(ISD::CARRY_FALSE,
1847                                    SDLoc(N), MVT::Glue));
1848   }
1849 
1850   return SDValue();
1851 }
1852 
1853 SDValue DAGCombiner::visitADDE(SDNode *N) {
1854   SDValue N0 = N->getOperand(0);
1855   SDValue N1 = N->getOperand(1);
1856   SDValue CarryIn = N->getOperand(2);
1857 
1858   // canonicalize constant to RHS
1859   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1860   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1861   if (N0C && !N1C)
1862     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1863                        N1, N0, CarryIn);
1864 
1865   // fold (adde x, y, false) -> (addc x, y)
1866   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1867     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1868 
1869   return SDValue();
1870 }
1871 
1872 // Since it may not be valid to emit a fold to zero for vector initializers
1873 // check if we can before folding.
1874 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
1875                              SelectionDAG &DAG, bool LegalOperations,
1876                              bool LegalTypes) {
1877   if (!VT.isVector())
1878     return DAG.getConstant(0, DL, VT);
1879   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1880     return DAG.getConstant(0, DL, VT);
1881   return SDValue();
1882 }
1883 
1884 SDValue DAGCombiner::visitSUB(SDNode *N) {
1885   SDValue N0 = N->getOperand(0);
1886   SDValue N1 = N->getOperand(1);
1887   EVT VT = N0.getValueType();
1888   SDLoc DL(N);
1889 
1890   // fold vector ops
1891   if (VT.isVector()) {
1892     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1893       return FoldedVOp;
1894 
1895     // fold (sub x, 0) -> x, vector edition
1896     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1897       return N0;
1898   }
1899 
1900   // fold (sub x, x) -> 0
1901   // FIXME: Refactor this and xor and other similar operations together.
1902   if (N0 == N1)
1903     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
1904   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
1905       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1906     // fold (sub c1, c2) -> c1-c2
1907     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
1908                                       N1.getNode());
1909   }
1910 
1911   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
1912 
1913   // fold (sub x, c) -> (add x, -c)
1914   if (N1C) {
1915     return DAG.getNode(ISD::ADD, DL, VT, N0,
1916                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1917   }
1918 
1919   // Right-shifting everything out but the sign bit followed by negation is the
1920   // same as flipping arithmetic/logical shift type without the negation:
1921   // -(X >>u 31) -> (X >>s 31)
1922   // -(X >>s 31) -> (X >>u 31)
1923   if (isNullConstantOrNullSplatConstant(N0) &&
1924       (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL)) {
1925     ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
1926     if (ShiftAmt && ShiftAmt->getZExtValue() == VT.getScalarSizeInBits() - 1) {
1927       auto NewOpc = N1->getOpcode() == ISD::SRA ? ISD::SRL :ISD::SRA;
1928       if (!LegalOperations || TLI.isOperationLegal(NewOpc, VT))
1929         return DAG.getNode(NewOpc, DL, VT, N1.getOperand(0), N1.getOperand(1));
1930     }
1931   }
1932 
1933   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1934   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
1935     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
1936 
1937   // fold A-(A-B) -> B
1938   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1939     return N1.getOperand(1);
1940 
1941   // fold (A+B)-A -> B
1942   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1943     return N0.getOperand(1);
1944 
1945   // fold (A+B)-B -> A
1946   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1947     return N0.getOperand(0);
1948 
1949   // fold C2-(A+C1) -> (C2-C1)-A
1950   if (N1.getOpcode() == ISD::ADD) {
1951     SDValue N11 = N1.getOperand(1);
1952     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
1953         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
1954       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
1955       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
1956     }
1957   }
1958 
1959   // fold ((A+(B+or-C))-B) -> A+or-C
1960   if (N0.getOpcode() == ISD::ADD &&
1961       (N0.getOperand(1).getOpcode() == ISD::SUB ||
1962        N0.getOperand(1).getOpcode() == ISD::ADD) &&
1963       N0.getOperand(1).getOperand(0) == N1)
1964     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
1965                        N0.getOperand(1).getOperand(1));
1966 
1967   // fold ((A+(C+B))-B) -> A+C
1968   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
1969       N0.getOperand(1).getOperand(1) == N1)
1970     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
1971                        N0.getOperand(1).getOperand(0));
1972 
1973   // fold ((A-(B-C))-C) -> A-B
1974   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
1975       N0.getOperand(1).getOperand(1) == N1)
1976     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
1977                        N0.getOperand(1).getOperand(0));
1978 
1979   // If either operand of a sub is undef, the result is undef
1980   if (N0.isUndef())
1981     return N0;
1982   if (N1.isUndef())
1983     return N1;
1984 
1985   // If the relocation model supports it, consider symbol offsets.
1986   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1987     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1988       // fold (sub Sym, c) -> Sym-c
1989       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1990         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1991                                     GA->getOffset() -
1992                                         (uint64_t)N1C->getSExtValue());
1993       // fold (sub Sym+c1, Sym+c2) -> c1-c2
1994       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1995         if (GA->getGlobal() == GB->getGlobal())
1996           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1997                                  DL, VT);
1998     }
1999 
2000   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2001   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2002     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2003     if (TN->getVT() == MVT::i1) {
2004       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2005                                  DAG.getConstant(1, DL, VT));
2006       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2007     }
2008   }
2009 
2010   return SDValue();
2011 }
2012 
2013 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2014   SDValue N0 = N->getOperand(0);
2015   SDValue N1 = N->getOperand(1);
2016   EVT VT = N0.getValueType();
2017   SDLoc DL(N);
2018 
2019   // If the flag result is dead, turn this into an SUB.
2020   if (!N->hasAnyUseOfValue(1))
2021     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2022                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2023 
2024   // fold (subc x, x) -> 0 + no borrow
2025   if (N0 == N1)
2026     return CombineTo(N, DAG.getConstant(0, DL, VT),
2027                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2028 
2029   // fold (subc x, 0) -> x + no borrow
2030   if (isNullConstant(N1))
2031     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2032 
2033   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2034   if (isAllOnesConstant(N0))
2035     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2036                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2037 
2038   return SDValue();
2039 }
2040 
2041 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2042   SDValue N0 = N->getOperand(0);
2043   SDValue N1 = N->getOperand(1);
2044   SDValue CarryIn = N->getOperand(2);
2045 
2046   // fold (sube x, y, false) -> (subc x, y)
2047   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2048     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2049 
2050   return SDValue();
2051 }
2052 
2053 SDValue DAGCombiner::visitMUL(SDNode *N) {
2054   SDValue N0 = N->getOperand(0);
2055   SDValue N1 = N->getOperand(1);
2056   EVT VT = N0.getValueType();
2057 
2058   // fold (mul x, undef) -> 0
2059   if (N0.isUndef() || N1.isUndef())
2060     return DAG.getConstant(0, SDLoc(N), VT);
2061 
2062   bool N0IsConst = false;
2063   bool N1IsConst = false;
2064   bool N1IsOpaqueConst = false;
2065   bool N0IsOpaqueConst = false;
2066   APInt ConstValue0, ConstValue1;
2067   // fold vector ops
2068   if (VT.isVector()) {
2069     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2070       return FoldedVOp;
2071 
2072     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2073     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2074   } else {
2075     N0IsConst = isa<ConstantSDNode>(N0);
2076     if (N0IsConst) {
2077       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2078       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2079     }
2080     N1IsConst = isa<ConstantSDNode>(N1);
2081     if (N1IsConst) {
2082       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2083       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2084     }
2085   }
2086 
2087   // fold (mul c1, c2) -> c1*c2
2088   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2089     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2090                                       N0.getNode(), N1.getNode());
2091 
2092   // canonicalize constant to RHS (vector doesn't have to splat)
2093   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2094      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2095     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2096   // fold (mul x, 0) -> 0
2097   if (N1IsConst && ConstValue1 == 0)
2098     return N1;
2099   // We require a splat of the entire scalar bit width for non-contiguous
2100   // bit patterns.
2101   bool IsFullSplat =
2102     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2103   // fold (mul x, 1) -> x
2104   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2105     return N0;
2106   // fold (mul x, -1) -> 0-x
2107   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2108     SDLoc DL(N);
2109     return DAG.getNode(ISD::SUB, DL, VT,
2110                        DAG.getConstant(0, DL, VT), N0);
2111   }
2112   // fold (mul x, (1 << c)) -> x << c
2113   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2114       IsFullSplat) {
2115     SDLoc DL(N);
2116     return DAG.getNode(ISD::SHL, DL, VT, N0,
2117                        DAG.getConstant(ConstValue1.logBase2(), DL,
2118                                        getShiftAmountTy(N0.getValueType())));
2119   }
2120   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2121   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2122       IsFullSplat) {
2123     unsigned Log2Val = (-ConstValue1).logBase2();
2124     SDLoc DL(N);
2125     // FIXME: If the input is something that is easily negated (e.g. a
2126     // single-use add), we should put the negate there.
2127     return DAG.getNode(ISD::SUB, DL, VT,
2128                        DAG.getConstant(0, DL, VT),
2129                        DAG.getNode(ISD::SHL, DL, VT, N0,
2130                             DAG.getConstant(Log2Val, DL,
2131                                       getShiftAmountTy(N0.getValueType()))));
2132   }
2133 
2134   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2135   if (N0.getOpcode() == ISD::SHL &&
2136       isConstantOrConstantVector(N1) &&
2137       isConstantOrConstantVector(N0.getOperand(1))) {
2138     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2139     AddToWorklist(C3.getNode());
2140     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2141   }
2142 
2143   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2144   // use.
2145   {
2146     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2147 
2148     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2149     if (N0.getOpcode() == ISD::SHL &&
2150         isConstantOrConstantVector(N0.getOperand(1)) &&
2151         N0.getNode()->hasOneUse()) {
2152       Sh = N0; Y = N1;
2153     } else if (N1.getOpcode() == ISD::SHL &&
2154                isConstantOrConstantVector(N1.getOperand(1)) &&
2155                N1.getNode()->hasOneUse()) {
2156       Sh = N1; Y = N0;
2157     }
2158 
2159     if (Sh.getNode()) {
2160       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2161       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2162     }
2163   }
2164 
2165   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2166   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2167       N0.getOpcode() == ISD::ADD &&
2168       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2169       isMulAddWithConstProfitable(N, N0, N1))
2170       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2171                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2172                                      N0.getOperand(0), N1),
2173                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2174                                      N0.getOperand(1), N1));
2175 
2176   // reassociate mul
2177   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2178     return RMUL;
2179 
2180   return SDValue();
2181 }
2182 
2183 /// Return true if divmod libcall is available.
2184 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2185                                      const TargetLowering &TLI) {
2186   RTLIB::Libcall LC;
2187   EVT NodeType = Node->getValueType(0);
2188   if (!NodeType.isSimple())
2189     return false;
2190   switch (NodeType.getSimpleVT().SimpleTy) {
2191   default: return false; // No libcall for vector types.
2192   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2193   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2194   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2195   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2196   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2197   }
2198 
2199   return TLI.getLibcallName(LC) != nullptr;
2200 }
2201 
2202 /// Issue divrem if both quotient and remainder are needed.
2203 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2204   if (Node->use_empty())
2205     return SDValue(); // This is a dead node, leave it alone.
2206 
2207   unsigned Opcode = Node->getOpcode();
2208   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2209   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2210 
2211   // DivMod lib calls can still work on non-legal types if using lib-calls.
2212   EVT VT = Node->getValueType(0);
2213   if (VT.isVector() || !VT.isInteger())
2214     return SDValue();
2215 
2216   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2217     return SDValue();
2218 
2219   // If DIVREM is going to get expanded into a libcall,
2220   // but there is no libcall available, then don't combine.
2221   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2222       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2223     return SDValue();
2224 
2225   // If div is legal, it's better to do the normal expansion
2226   unsigned OtherOpcode = 0;
2227   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2228     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2229     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2230       return SDValue();
2231   } else {
2232     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2233     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2234       return SDValue();
2235   }
2236 
2237   SDValue Op0 = Node->getOperand(0);
2238   SDValue Op1 = Node->getOperand(1);
2239   SDValue combined;
2240   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2241          UE = Op0.getNode()->use_end(); UI != UE;) {
2242     SDNode *User = *UI++;
2243     if (User == Node || User->use_empty())
2244       continue;
2245     // Convert the other matching node(s), too;
2246     // otherwise, the DIVREM may get target-legalized into something
2247     // target-specific that we won't be able to recognize.
2248     unsigned UserOpc = User->getOpcode();
2249     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2250         User->getOperand(0) == Op0 &&
2251         User->getOperand(1) == Op1) {
2252       if (!combined) {
2253         if (UserOpc == OtherOpcode) {
2254           SDVTList VTs = DAG.getVTList(VT, VT);
2255           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2256         } else if (UserOpc == DivRemOpc) {
2257           combined = SDValue(User, 0);
2258         } else {
2259           assert(UserOpc == Opcode);
2260           continue;
2261         }
2262       }
2263       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2264         CombineTo(User, combined);
2265       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2266         CombineTo(User, combined.getValue(1));
2267     }
2268   }
2269   return combined;
2270 }
2271 
2272 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2273   SDValue N0 = N->getOperand(0);
2274   SDValue N1 = N->getOperand(1);
2275   EVT VT = N->getValueType(0);
2276 
2277   // fold vector ops
2278   if (VT.isVector())
2279     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2280       return FoldedVOp;
2281 
2282   SDLoc DL(N);
2283 
2284   // fold (sdiv c1, c2) -> c1/c2
2285   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2286   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2287   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2288     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2289   // fold (sdiv X, 1) -> X
2290   if (N1C && N1C->isOne())
2291     return N0;
2292   // fold (sdiv X, -1) -> 0-X
2293   if (N1C && N1C->isAllOnesValue())
2294     return DAG.getNode(ISD::SUB, DL, VT,
2295                        DAG.getConstant(0, DL, VT), N0);
2296 
2297   // If we know the sign bits of both operands are zero, strength reduce to a
2298   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2299   if (!VT.isVector()) {
2300     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2301       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2302   }
2303 
2304   // fold (sdiv X, pow2) -> simple ops after legalize
2305   // FIXME: We check for the exact bit here because the generic lowering gives
2306   // better results in that case. The target-specific lowering should learn how
2307   // to handle exact sdivs efficiently.
2308   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2309       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2310       (N1C->getAPIntValue().isPowerOf2() ||
2311        (-N1C->getAPIntValue()).isPowerOf2())) {
2312     // Target-specific implementation of sdiv x, pow2.
2313     if (SDValue Res = BuildSDIVPow2(N))
2314       return Res;
2315 
2316     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2317 
2318     // Splat the sign bit into the register
2319     SDValue SGN =
2320         DAG.getNode(ISD::SRA, DL, VT, N0,
2321                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2322                                     getShiftAmountTy(N0.getValueType())));
2323     AddToWorklist(SGN.getNode());
2324 
2325     // Add (N0 < 0) ? abs2 - 1 : 0;
2326     SDValue SRL =
2327         DAG.getNode(ISD::SRL, DL, VT, SGN,
2328                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2329                                     getShiftAmountTy(SGN.getValueType())));
2330     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2331     AddToWorklist(SRL.getNode());
2332     AddToWorklist(ADD.getNode());    // Divide by pow2
2333     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2334                   DAG.getConstant(lg2, DL,
2335                                   getShiftAmountTy(ADD.getValueType())));
2336 
2337     // If we're dividing by a positive value, we're done.  Otherwise, we must
2338     // negate the result.
2339     if (N1C->getAPIntValue().isNonNegative())
2340       return SRA;
2341 
2342     AddToWorklist(SRA.getNode());
2343     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2344   }
2345 
2346   // If integer divide is expensive and we satisfy the requirements, emit an
2347   // alternate sequence.  Targets may check function attributes for size/speed
2348   // trade-offs.
2349   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2350   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2351     if (SDValue Op = BuildSDIV(N))
2352       return Op;
2353 
2354   // sdiv, srem -> sdivrem
2355   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2356   // Otherwise, we break the simplification logic in visitREM().
2357   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2358     if (SDValue DivRem = useDivRem(N))
2359         return DivRem;
2360 
2361   // undef / X -> 0
2362   if (N0.isUndef())
2363     return DAG.getConstant(0, DL, VT);
2364   // X / undef -> undef
2365   if (N1.isUndef())
2366     return N1;
2367 
2368   return SDValue();
2369 }
2370 
2371 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2372   SDValue N0 = N->getOperand(0);
2373   SDValue N1 = N->getOperand(1);
2374   EVT VT = N->getValueType(0);
2375 
2376   // fold vector ops
2377   if (VT.isVector())
2378     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2379       return FoldedVOp;
2380 
2381   SDLoc DL(N);
2382 
2383   // fold (udiv c1, c2) -> c1/c2
2384   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2385   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2386   if (N0C && N1C)
2387     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2388                                                     N0C, N1C))
2389       return Folded;
2390 
2391   // fold (udiv x, (1 << c)) -> x >>u c
2392   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
2393     return DAG.getNode(ISD::SRL, DL, VT, N0,
2394                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
2395                                        getShiftAmountTy(N0.getValueType())));
2396 
2397   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2398   if (N1.getOpcode() == ISD::SHL) {
2399     if (ConstantSDNode *SHC = isConstOrConstSplat(N1.getOperand(0))) {
2400       if (!SHC->isOpaque() && SHC->getAPIntValue().isPowerOf2()) {
2401         EVT ADDVT = N1.getOperand(1).getValueType();
2402         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
2403                                   N1.getOperand(1),
2404                                   DAG.getConstant(SHC->getAPIntValue()
2405                                                                   .logBase2(),
2406                                                   DL, ADDVT));
2407         AddToWorklist(Add.getNode());
2408         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2409       }
2410     }
2411   }
2412 
2413   // fold (udiv x, c) -> alternate
2414   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2415   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2416     if (SDValue Op = BuildUDIV(N))
2417       return Op;
2418 
2419   // sdiv, srem -> sdivrem
2420   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2421   // Otherwise, we break the simplification logic in visitREM().
2422   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2423     if (SDValue DivRem = useDivRem(N))
2424         return DivRem;
2425 
2426   // undef / X -> 0
2427   if (N0.isUndef())
2428     return DAG.getConstant(0, DL, VT);
2429   // X / undef -> undef
2430   if (N1.isUndef())
2431     return N1;
2432 
2433   return SDValue();
2434 }
2435 
2436 // handles ISD::SREM and ISD::UREM
2437 SDValue DAGCombiner::visitREM(SDNode *N) {
2438   unsigned Opcode = N->getOpcode();
2439   SDValue N0 = N->getOperand(0);
2440   SDValue N1 = N->getOperand(1);
2441   EVT VT = N->getValueType(0);
2442   bool isSigned = (Opcode == ISD::SREM);
2443   SDLoc DL(N);
2444 
2445   // fold (rem c1, c2) -> c1%c2
2446   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2447   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2448   if (N0C && N1C)
2449     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2450       return Folded;
2451 
2452   if (isSigned) {
2453     // If we know the sign bits of both operands are zero, strength reduce to a
2454     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2455     if (!VT.isVector()) {
2456       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2457         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2458     }
2459   } else {
2460     // fold (urem x, pow2) -> (and x, pow2-1)
2461     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2462         N1C->getAPIntValue().isPowerOf2()) {
2463       return DAG.getNode(ISD::AND, DL, VT, N0,
2464                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
2465     }
2466     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2467     if (N1.getOpcode() == ISD::SHL) {
2468       ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
2469       if (SHC && SHC->getAPIntValue().isPowerOf2()) {
2470         APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
2471         SDValue Add =
2472             DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
2473         AddToWorklist(Add.getNode());
2474         return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2475       }
2476     }
2477   }
2478 
2479   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2480 
2481   // If X/C can be simplified by the division-by-constant logic, lower
2482   // X%C to the equivalent of X-X/C*C.
2483   // To avoid mangling nodes, this simplification requires that the combine()
2484   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2485   // against this by skipping the simplification if isIntDivCheap().  When
2486   // div is not cheap, combine will not return a DIVREM.  Regardless,
2487   // checking cheapness here makes sense since the simplification results in
2488   // fatter code.
2489   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2490     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2491     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2492     AddToWorklist(Div.getNode());
2493     SDValue OptimizedDiv = combine(Div.getNode());
2494     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2495       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2496              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2497       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2498       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2499       AddToWorklist(Mul.getNode());
2500       return Sub;
2501     }
2502   }
2503 
2504   // sdiv, srem -> sdivrem
2505   if (SDValue DivRem = useDivRem(N))
2506     return DivRem.getValue(1);
2507 
2508   // undef % X -> 0
2509   if (N0.isUndef())
2510     return DAG.getConstant(0, DL, VT);
2511   // X % undef -> undef
2512   if (N1.isUndef())
2513     return N1;
2514 
2515   return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2519   SDValue N0 = N->getOperand(0);
2520   SDValue N1 = N->getOperand(1);
2521   EVT VT = N->getValueType(0);
2522   SDLoc DL(N);
2523 
2524   // fold (mulhs x, 0) -> 0
2525   if (isNullConstant(N1))
2526     return N1;
2527   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2528   if (isOneConstant(N1)) {
2529     SDLoc DL(N);
2530     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2531                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
2532                                        getShiftAmountTy(N0.getValueType())));
2533   }
2534   // fold (mulhs x, undef) -> 0
2535   if (N0.isUndef() || N1.isUndef())
2536     return DAG.getConstant(0, SDLoc(N), VT);
2537 
2538   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2539   // plus a shift.
2540   if (VT.isSimple() && !VT.isVector()) {
2541     MVT Simple = VT.getSimpleVT();
2542     unsigned SimpleSize = Simple.getSizeInBits();
2543     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2544     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2545       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2546       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2547       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2548       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2549             DAG.getConstant(SimpleSize, DL,
2550                             getShiftAmountTy(N1.getValueType())));
2551       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2552     }
2553   }
2554 
2555   return SDValue();
2556 }
2557 
2558 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2559   SDValue N0 = N->getOperand(0);
2560   SDValue N1 = N->getOperand(1);
2561   EVT VT = N->getValueType(0);
2562   SDLoc DL(N);
2563 
2564   // fold (mulhu x, 0) -> 0
2565   if (isNullConstant(N1))
2566     return N1;
2567   // fold (mulhu x, 1) -> 0
2568   if (isOneConstant(N1))
2569     return DAG.getConstant(0, DL, N0.getValueType());
2570   // fold (mulhu x, undef) -> 0
2571   if (N0.isUndef() || N1.isUndef())
2572     return DAG.getConstant(0, DL, VT);
2573 
2574   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2575   // plus a shift.
2576   if (VT.isSimple() && !VT.isVector()) {
2577     MVT Simple = VT.getSimpleVT();
2578     unsigned SimpleSize = Simple.getSizeInBits();
2579     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2580     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2581       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2582       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2583       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2584       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2585             DAG.getConstant(SimpleSize, DL,
2586                             getShiftAmountTy(N1.getValueType())));
2587       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2588     }
2589   }
2590 
2591   return SDValue();
2592 }
2593 
2594 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2595 /// give the opcodes for the two computations that are being performed. Return
2596 /// true if a simplification was made.
2597 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2598                                                 unsigned HiOp) {
2599   // If the high half is not needed, just compute the low half.
2600   bool HiExists = N->hasAnyUseOfValue(1);
2601   if (!HiExists &&
2602       (!LegalOperations ||
2603        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2604     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2605     return CombineTo(N, Res, Res);
2606   }
2607 
2608   // If the low half is not needed, just compute the high half.
2609   bool LoExists = N->hasAnyUseOfValue(0);
2610   if (!LoExists &&
2611       (!LegalOperations ||
2612        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2613     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2614     return CombineTo(N, Res, Res);
2615   }
2616 
2617   // If both halves are used, return as it is.
2618   if (LoExists && HiExists)
2619     return SDValue();
2620 
2621   // If the two computed results can be simplified separately, separate them.
2622   if (LoExists) {
2623     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2624     AddToWorklist(Lo.getNode());
2625     SDValue LoOpt = combine(Lo.getNode());
2626     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2627         (!LegalOperations ||
2628          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2629       return CombineTo(N, LoOpt, LoOpt);
2630   }
2631 
2632   if (HiExists) {
2633     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2634     AddToWorklist(Hi.getNode());
2635     SDValue HiOpt = combine(Hi.getNode());
2636     if (HiOpt.getNode() && HiOpt != Hi &&
2637         (!LegalOperations ||
2638          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2639       return CombineTo(N, HiOpt, HiOpt);
2640   }
2641 
2642   return SDValue();
2643 }
2644 
2645 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2646   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2647     return Res;
2648 
2649   EVT VT = N->getValueType(0);
2650   SDLoc DL(N);
2651 
2652   // If the type is twice as wide is legal, transform the mulhu to a wider
2653   // multiply plus a shift.
2654   if (VT.isSimple() && !VT.isVector()) {
2655     MVT Simple = VT.getSimpleVT();
2656     unsigned SimpleSize = Simple.getSizeInBits();
2657     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2658     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2659       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2660       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2661       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2662       // Compute the high part as N1.
2663       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2664             DAG.getConstant(SimpleSize, DL,
2665                             getShiftAmountTy(Lo.getValueType())));
2666       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2667       // Compute the low part as N0.
2668       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2669       return CombineTo(N, Lo, Hi);
2670     }
2671   }
2672 
2673   return SDValue();
2674 }
2675 
2676 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2677   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2678     return Res;
2679 
2680   EVT VT = N->getValueType(0);
2681   SDLoc DL(N);
2682 
2683   // If the type is twice as wide is legal, transform the mulhu to a wider
2684   // multiply plus a shift.
2685   if (VT.isSimple() && !VT.isVector()) {
2686     MVT Simple = VT.getSimpleVT();
2687     unsigned SimpleSize = Simple.getSizeInBits();
2688     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2689     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2690       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2691       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2692       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2693       // Compute the high part as N1.
2694       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2695             DAG.getConstant(SimpleSize, DL,
2696                             getShiftAmountTy(Lo.getValueType())));
2697       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2698       // Compute the low part as N0.
2699       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2700       return CombineTo(N, Lo, Hi);
2701     }
2702   }
2703 
2704   return SDValue();
2705 }
2706 
2707 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2708   // (smulo x, 2) -> (saddo x, x)
2709   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2710     if (C2->getAPIntValue() == 2)
2711       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2712                          N->getOperand(0), N->getOperand(0));
2713 
2714   return SDValue();
2715 }
2716 
2717 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2718   // (umulo x, 2) -> (uaddo x, x)
2719   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2720     if (C2->getAPIntValue() == 2)
2721       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2722                          N->getOperand(0), N->getOperand(0));
2723 
2724   return SDValue();
2725 }
2726 
2727 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2728   SDValue N0 = N->getOperand(0);
2729   SDValue N1 = N->getOperand(1);
2730   EVT VT = N0.getValueType();
2731 
2732   // fold vector ops
2733   if (VT.isVector())
2734     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2735       return FoldedVOp;
2736 
2737   // fold (add c1, c2) -> c1+c2
2738   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
2739   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2740   if (N0C && N1C)
2741     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
2742 
2743   // canonicalize constant to RHS
2744   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2745      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2746     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
2747 
2748   return SDValue();
2749 }
2750 
2751 /// If this is a binary operator with two operands of the same opcode, try to
2752 /// simplify it.
2753 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2754   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2755   EVT VT = N0.getValueType();
2756   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2757 
2758   // Bail early if none of these transforms apply.
2759   if (N0.getNode()->getNumOperands() == 0) return SDValue();
2760 
2761   // For each of OP in AND/OR/XOR:
2762   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2763   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2764   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2765   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2766   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2767   //
2768   // do not sink logical op inside of a vector extend, since it may combine
2769   // into a vsetcc.
2770   EVT Op0VT = N0.getOperand(0).getValueType();
2771   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2772        N0.getOpcode() == ISD::SIGN_EXTEND ||
2773        N0.getOpcode() == ISD::BSWAP ||
2774        // Avoid infinite looping with PromoteIntBinOp.
2775        (N0.getOpcode() == ISD::ANY_EXTEND &&
2776         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2777        (N0.getOpcode() == ISD::TRUNCATE &&
2778         (!TLI.isZExtFree(VT, Op0VT) ||
2779          !TLI.isTruncateFree(Op0VT, VT)) &&
2780         TLI.isTypeLegal(Op0VT))) &&
2781       !VT.isVector() &&
2782       Op0VT == N1.getOperand(0).getValueType() &&
2783       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2784     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2785                                  N0.getOperand(0).getValueType(),
2786                                  N0.getOperand(0), N1.getOperand(0));
2787     AddToWorklist(ORNode.getNode());
2788     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2789   }
2790 
2791   // For each of OP in SHL/SRL/SRA/AND...
2792   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2793   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
2794   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2795   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2796        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2797       N0.getOperand(1) == N1.getOperand(1)) {
2798     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2799                                  N0.getOperand(0).getValueType(),
2800                                  N0.getOperand(0), N1.getOperand(0));
2801     AddToWorklist(ORNode.getNode());
2802     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2803                        ORNode, N0.getOperand(1));
2804   }
2805 
2806   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2807   // Only perform this optimization up until type legalization, before
2808   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2809   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2810   // we don't want to undo this promotion.
2811   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2812   // on scalars.
2813   if ((N0.getOpcode() == ISD::BITCAST ||
2814        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2815        Level <= AfterLegalizeTypes) {
2816     SDValue In0 = N0.getOperand(0);
2817     SDValue In1 = N1.getOperand(0);
2818     EVT In0Ty = In0.getValueType();
2819     EVT In1Ty = In1.getValueType();
2820     SDLoc DL(N);
2821     // If both incoming values are integers, and the original types are the
2822     // same.
2823     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2824       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2825       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2826       AddToWorklist(Op.getNode());
2827       return BC;
2828     }
2829   }
2830 
2831   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2832   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2833   // If both shuffles use the same mask, and both shuffle within a single
2834   // vector, then it is worthwhile to move the swizzle after the operation.
2835   // The type-legalizer generates this pattern when loading illegal
2836   // vector types from memory. In many cases this allows additional shuffle
2837   // optimizations.
2838   // There are other cases where moving the shuffle after the xor/and/or
2839   // is profitable even if shuffles don't perform a swizzle.
2840   // If both shuffles use the same mask, and both shuffles have the same first
2841   // or second operand, then it might still be profitable to move the shuffle
2842   // after the xor/and/or operation.
2843   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
2844     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2845     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2846 
2847     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2848            "Inputs to shuffles are not the same type");
2849 
2850     // Check that both shuffles use the same mask. The masks are known to be of
2851     // the same length because the result vector type is the same.
2852     // Check also that shuffles have only one use to avoid introducing extra
2853     // instructions.
2854     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2855         SVN0->getMask().equals(SVN1->getMask())) {
2856       SDValue ShOp = N0->getOperand(1);
2857 
2858       // Don't try to fold this node if it requires introducing a
2859       // build vector of all zeros that might be illegal at this stage.
2860       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2861         if (!LegalTypes)
2862           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2863         else
2864           ShOp = SDValue();
2865       }
2866 
2867       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2868       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
2869       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2870       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2871         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2872                                       N0->getOperand(0), N1->getOperand(0));
2873         AddToWorklist(NewNode.getNode());
2874         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2875                                     SVN0->getMask());
2876       }
2877 
2878       // Don't try to fold this node if it requires introducing a
2879       // build vector of all zeros that might be illegal at this stage.
2880       ShOp = N0->getOperand(0);
2881       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2882         if (!LegalTypes)
2883           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2884         else
2885           ShOp = SDValue();
2886       }
2887 
2888       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2889       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
2890       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2891       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2892         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2893                                       N0->getOperand(1), N1->getOperand(1));
2894         AddToWorklist(NewNode.getNode());
2895         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2896                                     SVN0->getMask());
2897       }
2898     }
2899   }
2900 
2901   return SDValue();
2902 }
2903 
2904 /// This contains all DAGCombine rules which reduce two values combined by
2905 /// an And operation to a single value. This makes them reusable in the context
2906 /// of visitSELECT(). Rules involving constants are not included as
2907 /// visitSELECT() already handles those cases.
2908 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2909                                   SDNode *LocReference) {
2910   EVT VT = N1.getValueType();
2911 
2912   // fold (and x, undef) -> 0
2913   if (N0.isUndef() || N1.isUndef())
2914     return DAG.getConstant(0, SDLoc(LocReference), VT);
2915   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2916   SDValue LL, LR, RL, RR, CC0, CC1;
2917   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2918     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2919     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2920 
2921     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2922         LL.getValueType().isInteger()) {
2923       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2924       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2925         EVT CCVT = getSetCCResultType(LR.getValueType());
2926         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2927           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2928                                        LR.getValueType(), LL, RL);
2929           AddToWorklist(ORNode.getNode());
2930           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2931         }
2932       }
2933       if (isAllOnesConstant(LR)) {
2934         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2935         if (Op1 == ISD::SETEQ) {
2936           EVT CCVT = getSetCCResultType(LR.getValueType());
2937           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2938             SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2939                                           LR.getValueType(), LL, RL);
2940             AddToWorklist(ANDNode.getNode());
2941             return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2942           }
2943         }
2944         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2945         if (Op1 == ISD::SETGT) {
2946           EVT CCVT = getSetCCResultType(LR.getValueType());
2947           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2948             SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2949                                          LR.getValueType(), LL, RL);
2950             AddToWorklist(ORNode.getNode());
2951             return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2952           }
2953         }
2954       }
2955     }
2956     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2957     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2958         Op0 == Op1 && LL.getValueType().isInteger() &&
2959       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2960                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2961       EVT CCVT = getSetCCResultType(LL.getValueType());
2962       if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2963         SDLoc DL(N0);
2964         SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2965                                       LL, DAG.getConstant(1, DL,
2966                                                           LL.getValueType()));
2967         AddToWorklist(ADDNode.getNode());
2968         return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
2969                             DAG.getConstant(2, DL, LL.getValueType()),
2970                             ISD::SETUGE);
2971       }
2972     }
2973     // canonicalize equivalent to ll == rl
2974     if (LL == RR && LR == RL) {
2975       Op1 = ISD::getSetCCSwappedOperands(Op1);
2976       std::swap(RL, RR);
2977     }
2978     if (LL == RL && LR == RR) {
2979       bool isInteger = LL.getValueType().isInteger();
2980       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2981       if (Result != ISD::SETCC_INVALID &&
2982           (!LegalOperations ||
2983            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2984             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
2985         EVT CCVT = getSetCCResultType(LL.getValueType());
2986         if (N0.getValueType() == CCVT ||
2987             (!LegalOperations && N0.getValueType() == MVT::i1))
2988           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
2989                               LL, LR, Result);
2990       }
2991     }
2992   }
2993 
2994   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
2995       VT.getSizeInBits() <= 64) {
2996     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2997       APInt ADDC = ADDI->getAPIntValue();
2998       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2999         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3000         // immediate for an add, but it is legal if its top c2 bits are set,
3001         // transform the ADD so the immediate doesn't need to be materialized
3002         // in a register.
3003         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3004           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3005                                              SRLI->getZExtValue());
3006           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3007             ADDC |= Mask;
3008             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3009               SDLoc DL(N0);
3010               SDValue NewAdd =
3011                 DAG.getNode(ISD::ADD, DL, VT,
3012                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3013               CombineTo(N0.getNode(), NewAdd);
3014               // Return N so it doesn't get rechecked!
3015               return SDValue(LocReference, 0);
3016             }
3017           }
3018         }
3019       }
3020     }
3021   }
3022 
3023   // Reduce bit extract of low half of an integer to the narrower type.
3024   // (and (srl i64:x, K), KMask) ->
3025   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3026   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3027     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3028       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3029         unsigned Size = VT.getSizeInBits();
3030         const APInt &AndMask = CAnd->getAPIntValue();
3031         unsigned ShiftBits = CShift->getZExtValue();
3032         unsigned MaskBits = AndMask.countTrailingOnes();
3033         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3034 
3035         if (APIntOps::isMask(AndMask) &&
3036             // Required bits must not span the two halves of the integer and
3037             // must fit in the half size type.
3038             (ShiftBits + MaskBits <= Size / 2) &&
3039             TLI.isNarrowingProfitable(VT, HalfVT) &&
3040             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3041             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3042             TLI.isTruncateFree(VT, HalfVT) &&
3043             TLI.isZExtFree(HalfVT, VT)) {
3044           // The isNarrowingProfitable is to avoid regressions on PPC and
3045           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3046           // on downstream users of this. Those patterns could probably be
3047           // extended to handle extensions mixed in.
3048 
3049           SDValue SL(N0);
3050           assert(ShiftBits != 0 && MaskBits <= Size);
3051 
3052           // Extracting the highest bit of the low half.
3053           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3054           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3055                                       N0.getOperand(0));
3056 
3057           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3058           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3059           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3060           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3061           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3062         }
3063       }
3064     }
3065   }
3066 
3067   return SDValue();
3068 }
3069 
3070 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3071                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3072                                    bool &NarrowLoad) {
3073   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3074 
3075   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
3076     return false;
3077 
3078   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3079   LoadedVT = LoadN->getMemoryVT();
3080 
3081   if (ExtVT == LoadedVT &&
3082       (!LegalOperations ||
3083        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3084     // ZEXTLOAD will match without needing to change the size of the value being
3085     // loaded.
3086     NarrowLoad = false;
3087     return true;
3088   }
3089 
3090   // Do not change the width of a volatile load.
3091   if (LoadN->isVolatile())
3092     return false;
3093 
3094   // Do not generate loads of non-round integer types since these can
3095   // be expensive (and would be wrong if the type is not byte sized).
3096   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3097     return false;
3098 
3099   if (LegalOperations &&
3100       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3101     return false;
3102 
3103   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3104     return false;
3105 
3106   NarrowLoad = true;
3107   return true;
3108 }
3109 
3110 SDValue DAGCombiner::visitAND(SDNode *N) {
3111   SDValue N0 = N->getOperand(0);
3112   SDValue N1 = N->getOperand(1);
3113   EVT VT = N1.getValueType();
3114 
3115   // fold vector ops
3116   if (VT.isVector()) {
3117     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3118       return FoldedVOp;
3119 
3120     // fold (and x, 0) -> 0, vector edition
3121     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3122       // do not return N0, because undef node may exist in N0
3123       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3124                              SDLoc(N), N0.getValueType());
3125     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3126       // do not return N1, because undef node may exist in N1
3127       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3128                              SDLoc(N), N1.getValueType());
3129 
3130     // fold (and x, -1) -> x, vector edition
3131     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3132       return N1;
3133     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3134       return N0;
3135   }
3136 
3137   // fold (and c1, c2) -> c1&c2
3138   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3139   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3140   if (N0C && N1C && !N1C->isOpaque())
3141     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3142   // canonicalize constant to RHS
3143   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3144      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3145     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3146   // fold (and x, -1) -> x
3147   if (isAllOnesConstant(N1))
3148     return N0;
3149   // if (and x, c) is known to be zero, return 0
3150   unsigned BitWidth = VT.getScalarSizeInBits();
3151   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3152                                    APInt::getAllOnesValue(BitWidth)))
3153     return DAG.getConstant(0, SDLoc(N), VT);
3154   // reassociate and
3155   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3156     return RAND;
3157   // fold (and (or x, C), D) -> D if (C & D) == D
3158   if (N1C && N0.getOpcode() == ISD::OR)
3159     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3160       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3161         return N1;
3162   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3163   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3164     SDValue N0Op0 = N0.getOperand(0);
3165     APInt Mask = ~N1C->getAPIntValue();
3166     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3167     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3168       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3169                                  N0.getValueType(), N0Op0);
3170 
3171       // Replace uses of the AND with uses of the Zero extend node.
3172       CombineTo(N, Zext);
3173 
3174       // We actually want to replace all uses of the any_extend with the
3175       // zero_extend, to avoid duplicating things.  This will later cause this
3176       // AND to be folded.
3177       CombineTo(N0.getNode(), Zext);
3178       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3179     }
3180   }
3181   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3182   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3183   // already be zero by virtue of the width of the base type of the load.
3184   //
3185   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3186   // more cases.
3187   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3188        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3189        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3190        N0.getOperand(0).getResNo() == 0) ||
3191       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3192     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3193                                          N0 : N0.getOperand(0) );
3194 
3195     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3196     // This can be a pure constant or a vector splat, in which case we treat the
3197     // vector as a scalar and use the splat value.
3198     APInt Constant = APInt::getNullValue(1);
3199     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3200       Constant = C->getAPIntValue();
3201     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3202       APInt SplatValue, SplatUndef;
3203       unsigned SplatBitSize;
3204       bool HasAnyUndefs;
3205       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3206                                              SplatBitSize, HasAnyUndefs);
3207       if (IsSplat) {
3208         // Undef bits can contribute to a possible optimisation if set, so
3209         // set them.
3210         SplatValue |= SplatUndef;
3211 
3212         // The splat value may be something like "0x00FFFFFF", which means 0 for
3213         // the first vector value and FF for the rest, repeating. We need a mask
3214         // that will apply equally to all members of the vector, so AND all the
3215         // lanes of the constant together.
3216         EVT VT = Vector->getValueType(0);
3217         unsigned BitWidth = VT.getScalarSizeInBits();
3218 
3219         // If the splat value has been compressed to a bitlength lower
3220         // than the size of the vector lane, we need to re-expand it to
3221         // the lane size.
3222         if (BitWidth > SplatBitSize)
3223           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3224                SplatBitSize < BitWidth;
3225                SplatBitSize = SplatBitSize * 2)
3226             SplatValue |= SplatValue.shl(SplatBitSize);
3227 
3228         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3229         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3230         if (SplatBitSize % BitWidth == 0) {
3231           Constant = APInt::getAllOnesValue(BitWidth);
3232           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3233             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3234         }
3235       }
3236     }
3237 
3238     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3239     // actually legal and isn't going to get expanded, else this is a false
3240     // optimisation.
3241     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3242                                                     Load->getValueType(0),
3243                                                     Load->getMemoryVT());
3244 
3245     // Resize the constant to the same size as the original memory access before
3246     // extension. If it is still the AllOnesValue then this AND is completely
3247     // unneeded.
3248     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3249 
3250     bool B;
3251     switch (Load->getExtensionType()) {
3252     default: B = false; break;
3253     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3254     case ISD::ZEXTLOAD:
3255     case ISD::NON_EXTLOAD: B = true; break;
3256     }
3257 
3258     if (B && Constant.isAllOnesValue()) {
3259       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3260       // preserve semantics once we get rid of the AND.
3261       SDValue NewLoad(Load, 0);
3262       if (Load->getExtensionType() == ISD::EXTLOAD) {
3263         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3264                               Load->getValueType(0), SDLoc(Load),
3265                               Load->getChain(), Load->getBasePtr(),
3266                               Load->getOffset(), Load->getMemoryVT(),
3267                               Load->getMemOperand());
3268         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3269         if (Load->getNumValues() == 3) {
3270           // PRE/POST_INC loads have 3 values.
3271           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3272                            NewLoad.getValue(2) };
3273           CombineTo(Load, To, 3, true);
3274         } else {
3275           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3276         }
3277       }
3278 
3279       // Fold the AND away, taking care not to fold to the old load node if we
3280       // replaced it.
3281       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3282 
3283       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3284     }
3285   }
3286 
3287   // fold (and (load x), 255) -> (zextload x, i8)
3288   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3289   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3290   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3291                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3292                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3293     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3294     LoadSDNode *LN0 = HasAnyExt
3295       ? cast<LoadSDNode>(N0.getOperand(0))
3296       : cast<LoadSDNode>(N0);
3297     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3298         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3299       auto NarrowLoad = false;
3300       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3301       EVT ExtVT, LoadedVT;
3302       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3303                            NarrowLoad)) {
3304         if (!NarrowLoad) {
3305           SDValue NewLoad =
3306             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3307                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3308                            LN0->getMemOperand());
3309           AddToWorklist(N);
3310           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3311           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3312         } else {
3313           EVT PtrType = LN0->getOperand(1).getValueType();
3314 
3315           unsigned Alignment = LN0->getAlignment();
3316           SDValue NewPtr = LN0->getBasePtr();
3317 
3318           // For big endian targets, we need to add an offset to the pointer
3319           // to load the correct bytes.  For little endian systems, we merely
3320           // need to read fewer bytes from the same pointer.
3321           if (DAG.getDataLayout().isBigEndian()) {
3322             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3323             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3324             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3325             SDLoc DL(LN0);
3326             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3327                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3328             Alignment = MinAlign(Alignment, PtrOff);
3329           }
3330 
3331           AddToWorklist(NewPtr.getNode());
3332 
3333           SDValue Load = DAG.getExtLoad(
3334               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3335               LN0->getPointerInfo(), ExtVT, Alignment,
3336               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3337           AddToWorklist(N);
3338           CombineTo(LN0, Load, Load.getValue(1));
3339           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3340         }
3341       }
3342     }
3343   }
3344 
3345   if (SDValue Combined = visitANDLike(N0, N1, N))
3346     return Combined;
3347 
3348   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3349   if (N0.getOpcode() == N1.getOpcode())
3350     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3351       return Tmp;
3352 
3353   // Masking the negated extension of a boolean is just the zero-extended
3354   // boolean:
3355   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3356   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3357   //
3358   // Note: the SimplifyDemandedBits fold below can make an information-losing
3359   // transform, and then we have no way to find this better fold.
3360   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3361     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3362     SDValue SubRHS = N0.getOperand(1);
3363     if (SubLHS && SubLHS->isNullValue()) {
3364       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3365           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3366         return SubRHS;
3367       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3368           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3369         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3370     }
3371   }
3372 
3373   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3374   // fold (and (sra)) -> (and (srl)) when possible.
3375   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
3376     return SDValue(N, 0);
3377 
3378   // fold (zext_inreg (extload x)) -> (zextload x)
3379   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3380     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3381     EVT MemVT = LN0->getMemoryVT();
3382     // If we zero all the possible extended bits, then we can turn this into
3383     // a zextload if we are running before legalize or the operation is legal.
3384     unsigned BitWidth = N1.getScalarValueSizeInBits();
3385     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3386                            BitWidth - MemVT.getScalarSizeInBits())) &&
3387         ((!LegalOperations && !LN0->isVolatile()) ||
3388          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3389       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3390                                        LN0->getChain(), LN0->getBasePtr(),
3391                                        MemVT, LN0->getMemOperand());
3392       AddToWorklist(N);
3393       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3394       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3395     }
3396   }
3397   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3398   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3399       N0.hasOneUse()) {
3400     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3401     EVT MemVT = LN0->getMemoryVT();
3402     // If we zero all the possible extended bits, then we can turn this into
3403     // a zextload if we are running before legalize or the operation is legal.
3404     unsigned BitWidth = N1.getScalarValueSizeInBits();
3405     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3406                            BitWidth - MemVT.getScalarSizeInBits())) &&
3407         ((!LegalOperations && !LN0->isVolatile()) ||
3408          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3409       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3410                                        LN0->getChain(), LN0->getBasePtr(),
3411                                        MemVT, LN0->getMemOperand());
3412       AddToWorklist(N);
3413       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3414       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3415     }
3416   }
3417   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3418   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3419     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3420                                            N0.getOperand(1), false))
3421       return BSwap;
3422   }
3423 
3424   return SDValue();
3425 }
3426 
3427 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3428 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3429                                         bool DemandHighBits) {
3430   if (!LegalOperations)
3431     return SDValue();
3432 
3433   EVT VT = N->getValueType(0);
3434   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3435     return SDValue();
3436   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3437     return SDValue();
3438 
3439   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3440   bool LookPassAnd0 = false;
3441   bool LookPassAnd1 = false;
3442   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3443       std::swap(N0, N1);
3444   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3445       std::swap(N0, N1);
3446   if (N0.getOpcode() == ISD::AND) {
3447     if (!N0.getNode()->hasOneUse())
3448       return SDValue();
3449     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3450     if (!N01C || N01C->getZExtValue() != 0xFF00)
3451       return SDValue();
3452     N0 = N0.getOperand(0);
3453     LookPassAnd0 = true;
3454   }
3455 
3456   if (N1.getOpcode() == ISD::AND) {
3457     if (!N1.getNode()->hasOneUse())
3458       return SDValue();
3459     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3460     if (!N11C || N11C->getZExtValue() != 0xFF)
3461       return SDValue();
3462     N1 = N1.getOperand(0);
3463     LookPassAnd1 = true;
3464   }
3465 
3466   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3467     std::swap(N0, N1);
3468   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3469     return SDValue();
3470   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3471     return SDValue();
3472 
3473   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3474   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3475   if (!N01C || !N11C)
3476     return SDValue();
3477   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3478     return SDValue();
3479 
3480   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3481   SDValue N00 = N0->getOperand(0);
3482   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3483     if (!N00.getNode()->hasOneUse())
3484       return SDValue();
3485     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3486     if (!N001C || N001C->getZExtValue() != 0xFF)
3487       return SDValue();
3488     N00 = N00.getOperand(0);
3489     LookPassAnd0 = true;
3490   }
3491 
3492   SDValue N10 = N1->getOperand(0);
3493   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3494     if (!N10.getNode()->hasOneUse())
3495       return SDValue();
3496     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3497     if (!N101C || N101C->getZExtValue() != 0xFF00)
3498       return SDValue();
3499     N10 = N10.getOperand(0);
3500     LookPassAnd1 = true;
3501   }
3502 
3503   if (N00 != N10)
3504     return SDValue();
3505 
3506   // Make sure everything beyond the low halfword gets set to zero since the SRL
3507   // 16 will clear the top bits.
3508   unsigned OpSizeInBits = VT.getSizeInBits();
3509   if (DemandHighBits && OpSizeInBits > 16) {
3510     // If the left-shift isn't masked out then the only way this is a bswap is
3511     // if all bits beyond the low 8 are 0. In that case the entire pattern
3512     // reduces to a left shift anyway: leave it for other parts of the combiner.
3513     if (!LookPassAnd0)
3514       return SDValue();
3515 
3516     // However, if the right shift isn't masked out then it might be because
3517     // it's not needed. See if we can spot that too.
3518     if (!LookPassAnd1 &&
3519         !DAG.MaskedValueIsZero(
3520             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3521       return SDValue();
3522   }
3523 
3524   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3525   if (OpSizeInBits > 16) {
3526     SDLoc DL(N);
3527     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3528                       DAG.getConstant(OpSizeInBits - 16, DL,
3529                                       getShiftAmountTy(VT)));
3530   }
3531   return Res;
3532 }
3533 
3534 /// Return true if the specified node is an element that makes up a 32-bit
3535 /// packed halfword byteswap.
3536 /// ((x & 0x000000ff) << 8) |
3537 /// ((x & 0x0000ff00) >> 8) |
3538 /// ((x & 0x00ff0000) << 8) |
3539 /// ((x & 0xff000000) >> 8)
3540 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3541   if (!N.getNode()->hasOneUse())
3542     return false;
3543 
3544   unsigned Opc = N.getOpcode();
3545   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3546     return false;
3547 
3548   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3549   if (!N1C)
3550     return false;
3551 
3552   unsigned Num;
3553   switch (N1C->getZExtValue()) {
3554   default:
3555     return false;
3556   case 0xFF:       Num = 0; break;
3557   case 0xFF00:     Num = 1; break;
3558   case 0xFF0000:   Num = 2; break;
3559   case 0xFF000000: Num = 3; break;
3560   }
3561 
3562   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3563   SDValue N0 = N.getOperand(0);
3564   if (Opc == ISD::AND) {
3565     if (Num == 0 || Num == 2) {
3566       // (x >> 8) & 0xff
3567       // (x >> 8) & 0xff0000
3568       if (N0.getOpcode() != ISD::SRL)
3569         return false;
3570       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3571       if (!C || C->getZExtValue() != 8)
3572         return false;
3573     } else {
3574       // (x << 8) & 0xff00
3575       // (x << 8) & 0xff000000
3576       if (N0.getOpcode() != ISD::SHL)
3577         return false;
3578       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3579       if (!C || C->getZExtValue() != 8)
3580         return false;
3581     }
3582   } else if (Opc == ISD::SHL) {
3583     // (x & 0xff) << 8
3584     // (x & 0xff0000) << 8
3585     if (Num != 0 && Num != 2)
3586       return false;
3587     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3588     if (!C || C->getZExtValue() != 8)
3589       return false;
3590   } else { // Opc == ISD::SRL
3591     // (x & 0xff00) >> 8
3592     // (x & 0xff000000) >> 8
3593     if (Num != 1 && Num != 3)
3594       return false;
3595     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3596     if (!C || C->getZExtValue() != 8)
3597       return false;
3598   }
3599 
3600   if (Parts[Num])
3601     return false;
3602 
3603   Parts[Num] = N0.getOperand(0).getNode();
3604   return true;
3605 }
3606 
3607 /// Match a 32-bit packed halfword bswap. That is
3608 /// ((x & 0x000000ff) << 8) |
3609 /// ((x & 0x0000ff00) >> 8) |
3610 /// ((x & 0x00ff0000) << 8) |
3611 /// ((x & 0xff000000) >> 8)
3612 /// => (rotl (bswap x), 16)
3613 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3614   if (!LegalOperations)
3615     return SDValue();
3616 
3617   EVT VT = N->getValueType(0);
3618   if (VT != MVT::i32)
3619     return SDValue();
3620   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3621     return SDValue();
3622 
3623   // Look for either
3624   // (or (or (and), (and)), (or (and), (and)))
3625   // (or (or (or (and), (and)), (and)), (and))
3626   if (N0.getOpcode() != ISD::OR)
3627     return SDValue();
3628   SDValue N00 = N0.getOperand(0);
3629   SDValue N01 = N0.getOperand(1);
3630   SDNode *Parts[4] = {};
3631 
3632   if (N1.getOpcode() == ISD::OR &&
3633       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3634     // (or (or (and), (and)), (or (and), (and)))
3635     SDValue N000 = N00.getOperand(0);
3636     if (!isBSwapHWordElement(N000, Parts))
3637       return SDValue();
3638 
3639     SDValue N001 = N00.getOperand(1);
3640     if (!isBSwapHWordElement(N001, Parts))
3641       return SDValue();
3642     SDValue N010 = N01.getOperand(0);
3643     if (!isBSwapHWordElement(N010, Parts))
3644       return SDValue();
3645     SDValue N011 = N01.getOperand(1);
3646     if (!isBSwapHWordElement(N011, Parts))
3647       return SDValue();
3648   } else {
3649     // (or (or (or (and), (and)), (and)), (and))
3650     if (!isBSwapHWordElement(N1, Parts))
3651       return SDValue();
3652     if (!isBSwapHWordElement(N01, Parts))
3653       return SDValue();
3654     if (N00.getOpcode() != ISD::OR)
3655       return SDValue();
3656     SDValue N000 = N00.getOperand(0);
3657     if (!isBSwapHWordElement(N000, Parts))
3658       return SDValue();
3659     SDValue N001 = N00.getOperand(1);
3660     if (!isBSwapHWordElement(N001, Parts))
3661       return SDValue();
3662   }
3663 
3664   // Make sure the parts are all coming from the same node.
3665   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3666     return SDValue();
3667 
3668   SDLoc DL(N);
3669   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3670                               SDValue(Parts[0], 0));
3671 
3672   // Result of the bswap should be rotated by 16. If it's not legal, then
3673   // do  (x << 16) | (x >> 16).
3674   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3675   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3676     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3677   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3678     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3679   return DAG.getNode(ISD::OR, DL, VT,
3680                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3681                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3682 }
3683 
3684 /// This contains all DAGCombine rules which reduce two values combined by
3685 /// an Or operation to a single value \see visitANDLike().
3686 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3687   EVT VT = N1.getValueType();
3688   // fold (or x, undef) -> -1
3689   if (!LegalOperations &&
3690       (N0.isUndef() || N1.isUndef())) {
3691     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3692     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3693                            SDLoc(LocReference), VT);
3694   }
3695   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3696   SDValue LL, LR, RL, RR, CC0, CC1;
3697   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3698     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3699     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3700 
3701     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3702       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3703       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3704       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3705         EVT CCVT = getSetCCResultType(LR.getValueType());
3706         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3707           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3708                                        LR.getValueType(), LL, RL);
3709           AddToWorklist(ORNode.getNode());
3710           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3711         }
3712       }
3713       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3714       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
3715       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3716         EVT CCVT = getSetCCResultType(LR.getValueType());
3717         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3718           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3719                                         LR.getValueType(), LL, RL);
3720           AddToWorklist(ANDNode.getNode());
3721           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3722         }
3723       }
3724     }
3725     // canonicalize equivalent to ll == rl
3726     if (LL == RR && LR == RL) {
3727       Op1 = ISD::getSetCCSwappedOperands(Op1);
3728       std::swap(RL, RR);
3729     }
3730     if (LL == RL && LR == RR) {
3731       bool isInteger = LL.getValueType().isInteger();
3732       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3733       if (Result != ISD::SETCC_INVALID &&
3734           (!LegalOperations ||
3735            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3736             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3737         EVT CCVT = getSetCCResultType(LL.getValueType());
3738         if (N0.getValueType() == CCVT ||
3739             (!LegalOperations && N0.getValueType() == MVT::i1))
3740           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3741                               LL, LR, Result);
3742       }
3743     }
3744   }
3745 
3746   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
3747   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3748       // Don't increase # computations.
3749       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3750     // We can only do this xform if we know that bits from X that are set in C2
3751     // but not in C1 are already zero.  Likewise for Y.
3752     if (const ConstantSDNode *N0O1C =
3753         getAsNonOpaqueConstant(N0.getOperand(1))) {
3754       if (const ConstantSDNode *N1O1C =
3755           getAsNonOpaqueConstant(N1.getOperand(1))) {
3756         // We can only do this xform if we know that bits from X that are set in
3757         // C2 but not in C1 are already zero.  Likewise for Y.
3758         const APInt &LHSMask = N0O1C->getAPIntValue();
3759         const APInt &RHSMask = N1O1C->getAPIntValue();
3760 
3761         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3762             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3763           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3764                                   N0.getOperand(0), N1.getOperand(0));
3765           SDLoc DL(LocReference);
3766           return DAG.getNode(ISD::AND, DL, VT, X,
3767                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
3768         }
3769       }
3770     }
3771   }
3772 
3773   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3774   if (N0.getOpcode() == ISD::AND &&
3775       N1.getOpcode() == ISD::AND &&
3776       N0.getOperand(0) == N1.getOperand(0) &&
3777       // Don't increase # computations.
3778       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3779     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3780                             N0.getOperand(1), N1.getOperand(1));
3781     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3782   }
3783 
3784   return SDValue();
3785 }
3786 
3787 SDValue DAGCombiner::visitOR(SDNode *N) {
3788   SDValue N0 = N->getOperand(0);
3789   SDValue N1 = N->getOperand(1);
3790   EVT VT = N1.getValueType();
3791 
3792   // fold vector ops
3793   if (VT.isVector()) {
3794     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3795       return FoldedVOp;
3796 
3797     // fold (or x, 0) -> x, vector edition
3798     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3799       return N1;
3800     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3801       return N0;
3802 
3803     // fold (or x, -1) -> -1, vector edition
3804     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3805       // do not return N0, because undef node may exist in N0
3806       return DAG.getConstant(
3807           APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
3808           N0.getValueType());
3809     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3810       // do not return N1, because undef node may exist in N1
3811       return DAG.getConstant(
3812           APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
3813           N1.getValueType());
3814 
3815     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
3816     // Do this only if the resulting shuffle is legal.
3817     if (isa<ShuffleVectorSDNode>(N0) &&
3818         isa<ShuffleVectorSDNode>(N1) &&
3819         // Avoid folding a node with illegal type.
3820         TLI.isTypeLegal(VT)) {
3821       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
3822       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
3823       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
3824       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
3825       // Ensure both shuffles have a zero input.
3826       if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
3827         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
3828         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
3829         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3830         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3831         bool CanFold = true;
3832         int NumElts = VT.getVectorNumElements();
3833         SmallVector<int, 4> Mask(NumElts);
3834 
3835         for (int i = 0; i != NumElts; ++i) {
3836           int M0 = SV0->getMaskElt(i);
3837           int M1 = SV1->getMaskElt(i);
3838 
3839           // Determine if either index is pointing to a zero vector.
3840           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
3841           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
3842 
3843           // If one element is zero and the otherside is undef, keep undef.
3844           // This also handles the case that both are undef.
3845           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
3846             Mask[i] = -1;
3847             continue;
3848           }
3849 
3850           // Make sure only one of the elements is zero.
3851           if (M0Zero == M1Zero) {
3852             CanFold = false;
3853             break;
3854           }
3855 
3856           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
3857 
3858           // We have a zero and non-zero element. If the non-zero came from
3859           // SV0 make the index a LHS index. If it came from SV1, make it
3860           // a RHS index. We need to mod by NumElts because we don't care
3861           // which operand it came from in the original shuffles.
3862           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
3863         }
3864 
3865         if (CanFold) {
3866           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
3867           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
3868 
3869           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3870           if (!LegalMask) {
3871             std::swap(NewLHS, NewRHS);
3872             ShuffleVectorSDNode::commuteMask(Mask);
3873             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3874           }
3875 
3876           if (LegalMask)
3877             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
3878         }
3879       }
3880     }
3881   }
3882 
3883   // fold (or c1, c2) -> c1|c2
3884   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3885   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3886   if (N0C && N1C && !N1C->isOpaque())
3887     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3888   // canonicalize constant to RHS
3889   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3890      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3891     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3892   // fold (or x, 0) -> x
3893   if (isNullConstant(N1))
3894     return N0;
3895   // fold (or x, -1) -> -1
3896   if (isAllOnesConstant(N1))
3897     return N1;
3898   // fold (or x, c) -> c iff (x & ~c) == 0
3899   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3900     return N1;
3901 
3902   if (SDValue Combined = visitORLike(N0, N1, N))
3903     return Combined;
3904 
3905   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3906   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
3907     return BSwap;
3908   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
3909     return BSwap;
3910 
3911   // reassociate or
3912   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3913     return ROR;
3914   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3915   // iff (c1 & c2) == 0.
3916   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3917              isa<ConstantSDNode>(N0.getOperand(1))) {
3918     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3919     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3920       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3921                                                    N1C, C1))
3922         return DAG.getNode(
3923             ISD::AND, SDLoc(N), VT,
3924             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3925       return SDValue();
3926     }
3927   }
3928   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
3929   if (N0.getOpcode() == N1.getOpcode())
3930     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3931       return Tmp;
3932 
3933   // See if this is some rotate idiom.
3934   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3935     return SDValue(Rot, 0);
3936 
3937   // Simplify the operands using demanded-bits information.
3938   if (!VT.isVector() &&
3939       SimplifyDemandedBits(SDValue(N, 0)))
3940     return SDValue(N, 0);
3941 
3942   return SDValue();
3943 }
3944 
3945 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3946 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3947   if (Op.getOpcode() == ISD::AND) {
3948     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
3949       Mask = Op.getOperand(1);
3950       Op = Op.getOperand(0);
3951     } else {
3952       return false;
3953     }
3954   }
3955 
3956   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3957     Shift = Op;
3958     return true;
3959   }
3960 
3961   return false;
3962 }
3963 
3964 // Return true if we can prove that, whenever Neg and Pos are both in the
3965 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
3966 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3967 //
3968 //     (or (shift1 X, Neg), (shift2 X, Pos))
3969 //
3970 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3971 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
3972 // to consider shift amounts with defined behavior.
3973 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
3974   // If EltSize is a power of 2 then:
3975   //
3976   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
3977   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
3978   //
3979   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
3980   // for the stronger condition:
3981   //
3982   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
3983   //
3984   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
3985   // we can just replace Neg with Neg' for the rest of the function.
3986   //
3987   // In other cases we check for the even stronger condition:
3988   //
3989   //     Neg == EltSize - Pos                                    [B]
3990   //
3991   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
3992   // behavior if Pos == 0 (and consequently Neg == EltSize).
3993   //
3994   // We could actually use [A] whenever EltSize is a power of 2, but the
3995   // only extra cases that it would match are those uninteresting ones
3996   // where Neg and Pos are never in range at the same time.  E.g. for
3997   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
3998   // as well as (sub 32, Pos), but:
3999   //
4000   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4001   //
4002   // always invokes undefined behavior for 32-bit X.
4003   //
4004   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4005   unsigned MaskLoBits = 0;
4006   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4007     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4008       if (NegC->getAPIntValue() == EltSize - 1) {
4009         Neg = Neg.getOperand(0);
4010         MaskLoBits = Log2_64(EltSize);
4011       }
4012     }
4013   }
4014 
4015   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4016   if (Neg.getOpcode() != ISD::SUB)
4017     return false;
4018   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4019   if (!NegC)
4020     return false;
4021   SDValue NegOp1 = Neg.getOperand(1);
4022 
4023   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4024   // Pos'.  The truncation is redundant for the purpose of the equality.
4025   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4026     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4027       if (PosC->getAPIntValue() == EltSize - 1)
4028         Pos = Pos.getOperand(0);
4029 
4030   // The condition we need is now:
4031   //
4032   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4033   //
4034   // If NegOp1 == Pos then we need:
4035   //
4036   //              EltSize & Mask == NegC & Mask
4037   //
4038   // (because "x & Mask" is a truncation and distributes through subtraction).
4039   APInt Width;
4040   if (Pos == NegOp1)
4041     Width = NegC->getAPIntValue();
4042 
4043   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4044   // Then the condition we want to prove becomes:
4045   //
4046   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4047   //
4048   // which, again because "x & Mask" is a truncation, becomes:
4049   //
4050   //                NegC & Mask == (EltSize - PosC) & Mask
4051   //             EltSize & Mask == (NegC + PosC) & Mask
4052   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4053     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4054       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4055     else
4056       return false;
4057   } else
4058     return false;
4059 
4060   // Now we just need to check that EltSize & Mask == Width & Mask.
4061   if (MaskLoBits)
4062     // EltSize & Mask is 0 since Mask is EltSize - 1.
4063     return Width.getLoBits(MaskLoBits) == 0;
4064   return Width == EltSize;
4065 }
4066 
4067 // A subroutine of MatchRotate used once we have found an OR of two opposite
4068 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4069 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4070 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4071 // Neg with outer conversions stripped away.
4072 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4073                                        SDValue Neg, SDValue InnerPos,
4074                                        SDValue InnerNeg, unsigned PosOpcode,
4075                                        unsigned NegOpcode, const SDLoc &DL) {
4076   // fold (or (shl x, (*ext y)),
4077   //          (srl x, (*ext (sub 32, y)))) ->
4078   //   (rotl x, y) or (rotr x, (sub 32, y))
4079   //
4080   // fold (or (shl x, (*ext (sub 32, y))),
4081   //          (srl x, (*ext y))) ->
4082   //   (rotr x, y) or (rotl x, (sub 32, y))
4083   EVT VT = Shifted.getValueType();
4084   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4085     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4086     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4087                        HasPos ? Pos : Neg).getNode();
4088   }
4089 
4090   return nullptr;
4091 }
4092 
4093 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4094 // idioms for rotate, and if the target supports rotation instructions, generate
4095 // a rot[lr].
4096 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4097   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4098   EVT VT = LHS.getValueType();
4099   if (!TLI.isTypeLegal(VT)) return nullptr;
4100 
4101   // The target must have at least one rotate flavor.
4102   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4103   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4104   if (!HasROTL && !HasROTR) return nullptr;
4105 
4106   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4107   SDValue LHSShift;   // The shift.
4108   SDValue LHSMask;    // AND value if any.
4109   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4110     return nullptr; // Not part of a rotate.
4111 
4112   SDValue RHSShift;   // The shift.
4113   SDValue RHSMask;    // AND value if any.
4114   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4115     return nullptr; // Not part of a rotate.
4116 
4117   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4118     return nullptr;   // Not shifting the same value.
4119 
4120   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4121     return nullptr;   // Shifts must disagree.
4122 
4123   // Canonicalize shl to left side in a shl/srl pair.
4124   if (RHSShift.getOpcode() == ISD::SHL) {
4125     std::swap(LHS, RHS);
4126     std::swap(LHSShift, RHSShift);
4127     std::swap(LHSMask, RHSMask);
4128   }
4129 
4130   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4131   SDValue LHSShiftArg = LHSShift.getOperand(0);
4132   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4133   SDValue RHSShiftArg = RHSShift.getOperand(0);
4134   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4135 
4136   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4137   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4138   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4139     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4140     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4141     if ((LShVal + RShVal) != EltSizeInBits)
4142       return nullptr;
4143 
4144     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4145                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4146 
4147     // If there is an AND of either shifted operand, apply it to the result.
4148     if (LHSMask.getNode() || RHSMask.getNode()) {
4149       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4150       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
4151 
4152       if (LHSMask.getNode()) {
4153         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4154         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4155                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4156                                        DAG.getConstant(RHSBits, DL, VT)));
4157       }
4158       if (RHSMask.getNode()) {
4159         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4160         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4161                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4162                                        DAG.getConstant(LHSBits, DL, VT)));
4163       }
4164 
4165       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4166     }
4167 
4168     return Rot.getNode();
4169   }
4170 
4171   // If there is a mask here, and we have a variable shift, we can't be sure
4172   // that we're masking out the right stuff.
4173   if (LHSMask.getNode() || RHSMask.getNode())
4174     return nullptr;
4175 
4176   // If the shift amount is sign/zext/any-extended just peel it off.
4177   SDValue LExtOp0 = LHSShiftAmt;
4178   SDValue RExtOp0 = RHSShiftAmt;
4179   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4180        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4181        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4182        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4183       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4184        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4185        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4186        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4187     LExtOp0 = LHSShiftAmt.getOperand(0);
4188     RExtOp0 = RHSShiftAmt.getOperand(0);
4189   }
4190 
4191   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4192                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4193   if (TryL)
4194     return TryL;
4195 
4196   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4197                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4198   if (TryR)
4199     return TryR;
4200 
4201   return nullptr;
4202 }
4203 
4204 SDValue DAGCombiner::visitXOR(SDNode *N) {
4205   SDValue N0 = N->getOperand(0);
4206   SDValue N1 = N->getOperand(1);
4207   EVT VT = N0.getValueType();
4208 
4209   // fold vector ops
4210   if (VT.isVector()) {
4211     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4212       return FoldedVOp;
4213 
4214     // fold (xor x, 0) -> x, vector edition
4215     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4216       return N1;
4217     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4218       return N0;
4219   }
4220 
4221   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4222   if (N0.isUndef() && N1.isUndef())
4223     return DAG.getConstant(0, SDLoc(N), VT);
4224   // fold (xor x, undef) -> undef
4225   if (N0.isUndef())
4226     return N0;
4227   if (N1.isUndef())
4228     return N1;
4229   // fold (xor c1, c2) -> c1^c2
4230   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4231   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4232   if (N0C && N1C)
4233     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4234   // canonicalize constant to RHS
4235   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4236      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4237     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4238   // fold (xor x, 0) -> x
4239   if (isNullConstant(N1))
4240     return N0;
4241   // reassociate xor
4242   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4243     return RXOR;
4244 
4245   // fold !(x cc y) -> (x !cc y)
4246   SDValue LHS, RHS, CC;
4247   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4248     bool isInt = LHS.getValueType().isInteger();
4249     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4250                                                isInt);
4251 
4252     if (!LegalOperations ||
4253         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4254       switch (N0.getOpcode()) {
4255       default:
4256         llvm_unreachable("Unhandled SetCC Equivalent!");
4257       case ISD::SETCC:
4258         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4259       case ISD::SELECT_CC:
4260         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4261                                N0.getOperand(3), NotCC);
4262       }
4263     }
4264   }
4265 
4266   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4267   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4268       N0.getNode()->hasOneUse() &&
4269       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4270     SDValue V = N0.getOperand(0);
4271     SDLoc DL(N0);
4272     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4273                     DAG.getConstant(1, DL, V.getValueType()));
4274     AddToWorklist(V.getNode());
4275     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4276   }
4277 
4278   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4279   if (isOneConstant(N1) && VT == MVT::i1 &&
4280       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4281     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4282     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4283       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4284       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4285       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4286       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4287       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4288     }
4289   }
4290   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4291   if (isAllOnesConstant(N1) &&
4292       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4293     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4294     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4295       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4296       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4297       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4298       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4299       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4300     }
4301   }
4302   // fold (xor (and x, y), y) -> (and (not x), y)
4303   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4304       N0->getOperand(1) == N1) {
4305     SDValue X = N0->getOperand(0);
4306     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4307     AddToWorklist(NotX.getNode());
4308     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4309   }
4310   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4311   if (N1C && N0.getOpcode() == ISD::XOR) {
4312     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4313       SDLoc DL(N);
4314       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4315                          DAG.getConstant(N1C->getAPIntValue() ^
4316                                          N00C->getAPIntValue(), DL, VT));
4317     }
4318     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4319       SDLoc DL(N);
4320       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4321                          DAG.getConstant(N1C->getAPIntValue() ^
4322                                          N01C->getAPIntValue(), DL, VT));
4323     }
4324   }
4325   // fold (xor x, x) -> 0
4326   if (N0 == N1)
4327     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4328 
4329   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4330   // Here is a concrete example of this equivalence:
4331   // i16   x ==  14
4332   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
4333   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4334   //
4335   // =>
4336   //
4337   // i16     ~1      == 0b1111111111111110
4338   // i16 rol(~1, 14) == 0b1011111111111111
4339   //
4340   // Some additional tips to help conceptualize this transform:
4341   // - Try to see the operation as placing a single zero in a value of all ones.
4342   // - There exists no value for x which would allow the result to contain zero.
4343   // - Values of x larger than the bitwidth are undefined and do not require a
4344   //   consistent result.
4345   // - Pushing the zero left requires shifting one bits in from the right.
4346   // A rotate left of ~1 is a nice way of achieving the desired result.
4347   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4348       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4349     SDLoc DL(N);
4350     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4351                        N0.getOperand(1));
4352   }
4353 
4354   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
4355   if (N0.getOpcode() == N1.getOpcode())
4356     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4357       return Tmp;
4358 
4359   // Simplify the expression using non-local knowledge.
4360   if (!VT.isVector() &&
4361       SimplifyDemandedBits(SDValue(N, 0)))
4362     return SDValue(N, 0);
4363 
4364   return SDValue();
4365 }
4366 
4367 /// Handle transforms common to the three shifts, when the shift amount is a
4368 /// constant.
4369 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4370   SDNode *LHS = N->getOperand(0).getNode();
4371   if (!LHS->hasOneUse()) return SDValue();
4372 
4373   // We want to pull some binops through shifts, so that we have (and (shift))
4374   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
4375   // thing happens with address calculations, so it's important to canonicalize
4376   // it.
4377   bool HighBitSet = false;  // Can we transform this if the high bit is set?
4378 
4379   switch (LHS->getOpcode()) {
4380   default: return SDValue();
4381   case ISD::OR:
4382   case ISD::XOR:
4383     HighBitSet = false; // We can only transform sra if the high bit is clear.
4384     break;
4385   case ISD::AND:
4386     HighBitSet = true;  // We can only transform sra if the high bit is set.
4387     break;
4388   case ISD::ADD:
4389     if (N->getOpcode() != ISD::SHL)
4390       return SDValue(); // only shl(add) not sr[al](add).
4391     HighBitSet = false; // We can only transform sra if the high bit is clear.
4392     break;
4393   }
4394 
4395   // We require the RHS of the binop to be a constant and not opaque as well.
4396   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4397   if (!BinOpCst) return SDValue();
4398 
4399   // FIXME: disable this unless the input to the binop is a shift by a constant.
4400   // If it is not a shift, it pessimizes some common cases like:
4401   //
4402   //    void foo(int *X, int i) { X[i & 1235] = 1; }
4403   //    int bar(int *X, int i) { return X[i & 255]; }
4404   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4405   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
4406        BinOpLHSVal->getOpcode() != ISD::SRA &&
4407        BinOpLHSVal->getOpcode() != ISD::SRL) ||
4408       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
4409     return SDValue();
4410 
4411   EVT VT = N->getValueType(0);
4412 
4413   // If this is a signed shift right, and the high bit is modified by the
4414   // logical operation, do not perform the transformation. The highBitSet
4415   // boolean indicates the value of the high bit of the constant which would
4416   // cause it to be modified for this operation.
4417   if (N->getOpcode() == ISD::SRA) {
4418     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4419     if (BinOpRHSSignSet != HighBitSet)
4420       return SDValue();
4421   }
4422 
4423   if (!TLI.isDesirableToCommuteWithShift(LHS))
4424     return SDValue();
4425 
4426   // Fold the constants, shifting the binop RHS by the shift amount.
4427   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4428                                N->getValueType(0),
4429                                LHS->getOperand(1), N->getOperand(1));
4430   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4431 
4432   // Create the new shift.
4433   SDValue NewShift = DAG.getNode(N->getOpcode(),
4434                                  SDLoc(LHS->getOperand(0)),
4435                                  VT, LHS->getOperand(0), N->getOperand(1));
4436 
4437   // Create the new binop.
4438   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4439 }
4440 
4441 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4442   assert(N->getOpcode() == ISD::TRUNCATE);
4443   assert(N->getOperand(0).getOpcode() == ISD::AND);
4444 
4445   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4446   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4447     SDValue N01 = N->getOperand(0).getOperand(1);
4448     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
4449       SDLoc DL(N);
4450       EVT TruncVT = N->getValueType(0);
4451       SDValue N00 = N->getOperand(0).getOperand(0);
4452       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
4453       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
4454       AddToWorklist(Trunc00.getNode());
4455       AddToWorklist(Trunc01.getNode());
4456       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
4457     }
4458   }
4459 
4460   return SDValue();
4461 }
4462 
4463 SDValue DAGCombiner::visitRotate(SDNode *N) {
4464   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4465   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4466       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4467     if (SDValue NewOp1 =
4468             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
4469       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4470                          N->getOperand(0), NewOp1);
4471   }
4472   return SDValue();
4473 }
4474 
4475 SDValue DAGCombiner::visitSHL(SDNode *N) {
4476   SDValue N0 = N->getOperand(0);
4477   SDValue N1 = N->getOperand(1);
4478   EVT VT = N0.getValueType();
4479   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4480 
4481   // fold vector ops
4482   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4483   if (VT.isVector()) {
4484     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4485       return FoldedVOp;
4486 
4487     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
4488     // If setcc produces all-one true value then:
4489     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4490     if (N1CV && N1CV->isConstant()) {
4491       if (N0.getOpcode() == ISD::AND) {
4492         SDValue N00 = N0->getOperand(0);
4493         SDValue N01 = N0->getOperand(1);
4494         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
4495 
4496         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4497             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4498                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
4499           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4500                                                      N01CV, N1CV))
4501             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4502         }
4503       } else {
4504         N1C = isConstOrConstSplat(N1);
4505       }
4506     }
4507   }
4508 
4509   // fold (shl c1, c2) -> c1<<c2
4510   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4511   if (N0C && N1C && !N1C->isOpaque())
4512     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4513   // fold (shl 0, x) -> 0
4514   if (isNullConstant(N0))
4515     return N0;
4516   // fold (shl x, c >= size(x)) -> undef
4517   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4518     return DAG.getUNDEF(VT);
4519   // fold (shl x, 0) -> x
4520   if (N1C && N1C->isNullValue())
4521     return N0;
4522   // fold (shl undef, x) -> 0
4523   if (N0.isUndef())
4524     return DAG.getConstant(0, SDLoc(N), VT);
4525   // if (shl x, c) is known to be zero, return 0
4526   if (DAG.MaskedValueIsZero(SDValue(N, 0),
4527                             APInt::getAllOnesValue(OpSizeInBits)))
4528     return DAG.getConstant(0, SDLoc(N), VT);
4529   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4530   if (N1.getOpcode() == ISD::TRUNCATE &&
4531       N1.getOperand(0).getOpcode() == ISD::AND) {
4532     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4533       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4534   }
4535 
4536   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4537     return SDValue(N, 0);
4538 
4539   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4540   if (N1C && N0.getOpcode() == ISD::SHL) {
4541     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4542       SDLoc DL(N);
4543       APInt c1 = N0C1->getAPIntValue();
4544       APInt c2 = N1C->getAPIntValue();
4545       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4546 
4547       APInt Sum = c1 + c2;
4548       if (Sum.uge(OpSizeInBits))
4549         return DAG.getConstant(0, DL, VT);
4550 
4551       return DAG.getNode(
4552           ISD::SHL, DL, VT, N0.getOperand(0),
4553           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4554     }
4555   }
4556 
4557   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4558   // For this to be valid, the second form must not preserve any of the bits
4559   // that are shifted out by the inner shift in the first form.  This means
4560   // the outer shift size must be >= the number of bits added by the ext.
4561   // As a corollary, we don't care what kind of ext it is.
4562   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4563               N0.getOpcode() == ISD::ANY_EXTEND ||
4564               N0.getOpcode() == ISD::SIGN_EXTEND) &&
4565       N0.getOperand(0).getOpcode() == ISD::SHL) {
4566     SDValue N0Op0 = N0.getOperand(0);
4567     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4568       APInt c1 = N0Op0C1->getAPIntValue();
4569       APInt c2 = N1C->getAPIntValue();
4570       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4571 
4572       EVT InnerShiftVT = N0Op0.getValueType();
4573       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4574       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
4575         SDLoc DL(N0);
4576         APInt Sum = c1 + c2;
4577         if (Sum.uge(OpSizeInBits))
4578           return DAG.getConstant(0, DL, VT);
4579 
4580         return DAG.getNode(
4581             ISD::SHL, DL, VT,
4582             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
4583             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4584       }
4585     }
4586   }
4587 
4588   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4589   // Only fold this if the inner zext has no other uses to avoid increasing
4590   // the total number of instructions.
4591   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4592       N0.getOperand(0).getOpcode() == ISD::SRL) {
4593     SDValue N0Op0 = N0.getOperand(0);
4594     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4595       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
4596         uint64_t c1 = N0Op0C1->getZExtValue();
4597         uint64_t c2 = N1C->getZExtValue();
4598         if (c1 == c2) {
4599           SDValue NewOp0 = N0.getOperand(0);
4600           EVT CountVT = NewOp0.getOperand(1).getValueType();
4601           SDLoc DL(N);
4602           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4603                                        NewOp0,
4604                                        DAG.getConstant(c2, DL, CountVT));
4605           AddToWorklist(NewSHL.getNode());
4606           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4607         }
4608       }
4609     }
4610   }
4611 
4612   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
4613   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
4614   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4615       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4616     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4617       uint64_t C1 = N0C1->getZExtValue();
4618       uint64_t C2 = N1C->getZExtValue();
4619       SDLoc DL(N);
4620       if (C1 <= C2)
4621         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4622                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4623       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4624                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4625     }
4626   }
4627 
4628   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4629   //                               (and (srl x, (sub c1, c2), MASK)
4630   // Only fold this if the inner shift has no other uses -- if it does, folding
4631   // this will increase the total number of instructions.
4632   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4633     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4634       uint64_t c1 = N0C1->getZExtValue();
4635       if (c1 < OpSizeInBits) {
4636         uint64_t c2 = N1C->getZExtValue();
4637         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4638         SDValue Shift;
4639         if (c2 > c1) {
4640           Mask = Mask.shl(c2 - c1);
4641           SDLoc DL(N);
4642           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4643                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4644         } else {
4645           Mask = Mask.lshr(c1 - c2);
4646           SDLoc DL(N);
4647           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4648                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4649         }
4650         SDLoc DL(N0);
4651         return DAG.getNode(ISD::AND, DL, VT, Shift,
4652                            DAG.getConstant(Mask, DL, VT));
4653       }
4654     }
4655   }
4656 
4657   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4658   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
4659       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
4660     unsigned BitSize = VT.getScalarSizeInBits();
4661     SDLoc DL(N);
4662     SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
4663     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
4664     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
4665   }
4666 
4667   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4668   // Variant of version done on multiply, except mul by a power of 2 is turned
4669   // into a shift.
4670   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4671       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
4672       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
4673     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4674     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4675     AddToWorklist(Shl0.getNode());
4676     AddToWorklist(Shl1.getNode());
4677     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4678   }
4679 
4680   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
4681   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
4682       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
4683       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
4684     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4685     AddToWorklist(Shl.getNode());
4686     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
4687   }
4688 
4689   if (N1C && !N1C->isOpaque())
4690     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
4691       return NewSHL;
4692 
4693   return SDValue();
4694 }
4695 
4696 SDValue DAGCombiner::visitSRA(SDNode *N) {
4697   SDValue N0 = N->getOperand(0);
4698   SDValue N1 = N->getOperand(1);
4699   EVT VT = N0.getValueType();
4700   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4701 
4702   // Arithmetic shifting an all-sign-bit value is a no-op.
4703   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
4704     return N0;
4705 
4706   // fold vector ops
4707   if (VT.isVector())
4708     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4709       return FoldedVOp;
4710 
4711   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4712 
4713   // fold (sra c1, c2) -> (sra c1, c2)
4714   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4715   if (N0C && N1C && !N1C->isOpaque())
4716     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4717   // fold (sra 0, x) -> 0
4718   if (isNullConstant(N0))
4719     return N0;
4720   // fold (sra -1, x) -> -1
4721   if (isAllOnesConstant(N0))
4722     return N0;
4723   // fold (sra x, c >= size(x)) -> undef
4724   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4725     return DAG.getUNDEF(VT);
4726   // fold (sra x, 0) -> x
4727   if (N1C && N1C->isNullValue())
4728     return N0;
4729   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4730   // sext_inreg.
4731   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4732     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4733     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4734     if (VT.isVector())
4735       ExtVT = EVT::getVectorVT(*DAG.getContext(),
4736                                ExtVT, VT.getVectorNumElements());
4737     if ((!LegalOperations ||
4738          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4739       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4740                          N0.getOperand(0), DAG.getValueType(ExtVT));
4741   }
4742 
4743   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4744   if (N1C && N0.getOpcode() == ISD::SRA) {
4745     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4746       SDLoc DL(N);
4747       APInt c1 = N0C1->getAPIntValue();
4748       APInt c2 = N1C->getAPIntValue();
4749       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4750 
4751       APInt Sum = c1 + c2;
4752       if (Sum.uge(OpSizeInBits))
4753         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
4754 
4755       return DAG.getNode(
4756           ISD::SRA, DL, VT, N0.getOperand(0),
4757           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4758     }
4759   }
4760 
4761   // fold (sra (shl X, m), (sub result_size, n))
4762   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4763   // result_size - n != m.
4764   // If truncate is free for the target sext(shl) is likely to result in better
4765   // code.
4766   if (N0.getOpcode() == ISD::SHL && N1C) {
4767     // Get the two constanst of the shifts, CN0 = m, CN = n.
4768     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4769     if (N01C) {
4770       LLVMContext &Ctx = *DAG.getContext();
4771       // Determine what the truncate's result bitsize and type would be.
4772       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4773 
4774       if (VT.isVector())
4775         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4776 
4777       // Determine the residual right-shift amount.
4778       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4779 
4780       // If the shift is not a no-op (in which case this should be just a sign
4781       // extend already), the truncated to type is legal, sign_extend is legal
4782       // on that type, and the truncate to that type is both legal and free,
4783       // perform the transform.
4784       if ((ShiftAmt > 0) &&
4785           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4786           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4787           TLI.isTruncateFree(VT, TruncVT)) {
4788 
4789         SDLoc DL(N);
4790         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4791             getShiftAmountTy(N0.getOperand(0).getValueType()));
4792         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4793                                     N0.getOperand(0), Amt);
4794         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4795                                     Shift);
4796         return DAG.getNode(ISD::SIGN_EXTEND, DL,
4797                            N->getValueType(0), Trunc);
4798       }
4799     }
4800   }
4801 
4802   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4803   if (N1.getOpcode() == ISD::TRUNCATE &&
4804       N1.getOperand(0).getOpcode() == ISD::AND) {
4805     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4806       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4807   }
4808 
4809   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4810   //      if c1 is equal to the number of bits the trunc removes
4811   if (N0.getOpcode() == ISD::TRUNCATE &&
4812       (N0.getOperand(0).getOpcode() == ISD::SRL ||
4813        N0.getOperand(0).getOpcode() == ISD::SRA) &&
4814       N0.getOperand(0).hasOneUse() &&
4815       N0.getOperand(0).getOperand(1).hasOneUse() &&
4816       N1C) {
4817     SDValue N0Op0 = N0.getOperand(0);
4818     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4819       unsigned LargeShiftVal = LargeShift->getZExtValue();
4820       EVT LargeVT = N0Op0.getValueType();
4821 
4822       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4823         SDLoc DL(N);
4824         SDValue Amt =
4825           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4826                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
4827         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4828                                   N0Op0.getOperand(0), Amt);
4829         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4830       }
4831     }
4832   }
4833 
4834   // Simplify, based on bits shifted out of the LHS.
4835   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4836     return SDValue(N, 0);
4837 
4838 
4839   // If the sign bit is known to be zero, switch this to a SRL.
4840   if (DAG.SignBitIsZero(N0))
4841     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4842 
4843   if (N1C && !N1C->isOpaque())
4844     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
4845       return NewSRA;
4846 
4847   return SDValue();
4848 }
4849 
4850 SDValue DAGCombiner::visitSRL(SDNode *N) {
4851   SDValue N0 = N->getOperand(0);
4852   SDValue N1 = N->getOperand(1);
4853   EVT VT = N0.getValueType();
4854   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4855 
4856   // fold vector ops
4857   if (VT.isVector())
4858     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4859       return FoldedVOp;
4860 
4861   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4862 
4863   // fold (srl c1, c2) -> c1 >>u c2
4864   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4865   if (N0C && N1C && !N1C->isOpaque())
4866     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
4867   // fold (srl 0, x) -> 0
4868   if (isNullConstant(N0))
4869     return N0;
4870   // fold (srl x, c >= size(x)) -> undef
4871   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4872     return DAG.getUNDEF(VT);
4873   // fold (srl x, 0) -> x
4874   if (N1C && N1C->isNullValue())
4875     return N0;
4876   // if (srl x, c) is known to be zero, return 0
4877   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4878                                    APInt::getAllOnesValue(OpSizeInBits)))
4879     return DAG.getConstant(0, SDLoc(N), VT);
4880 
4881   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4882   if (N1C && N0.getOpcode() == ISD::SRL) {
4883     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4884       SDLoc DL(N);
4885       APInt c1 = N0C1->getAPIntValue();
4886       APInt c2 = N1C->getAPIntValue();
4887       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4888 
4889       APInt Sum = c1 + c2;
4890       if (Sum.uge(OpSizeInBits))
4891         return DAG.getConstant(0, DL, VT);
4892 
4893       return DAG.getNode(
4894           ISD::SRL, DL, VT, N0.getOperand(0),
4895           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4896     }
4897   }
4898 
4899   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4900   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4901       N0.getOperand(0).getOpcode() == ISD::SRL &&
4902       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4903     uint64_t c1 =
4904       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4905     uint64_t c2 = N1C->getZExtValue();
4906     EVT InnerShiftVT = N0.getOperand(0).getValueType();
4907     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4908     uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4909     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4910     if (c1 + OpSizeInBits == InnerShiftSize) {
4911       SDLoc DL(N0);
4912       if (c1 + c2 >= InnerShiftSize)
4913         return DAG.getConstant(0, DL, VT);
4914       return DAG.getNode(ISD::TRUNCATE, DL, VT,
4915                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
4916                                      N0.getOperand(0)->getOperand(0),
4917                                      DAG.getConstant(c1 + c2, DL,
4918                                                      ShiftCountVT)));
4919     }
4920   }
4921 
4922   // fold (srl (shl x, c), c) -> (and x, cst2)
4923   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
4924       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
4925     SDLoc DL(N);
4926     APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
4927     SDValue Mask =
4928         DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
4929     AddToWorklist(Mask.getNode());
4930     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
4931   }
4932 
4933   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4934   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4935     // Shifting in all undef bits?
4936     EVT SmallVT = N0.getOperand(0).getValueType();
4937     unsigned BitSize = SmallVT.getScalarSizeInBits();
4938     if (N1C->getZExtValue() >= BitSize)
4939       return DAG.getUNDEF(VT);
4940 
4941     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4942       uint64_t ShiftAmt = N1C->getZExtValue();
4943       SDLoc DL0(N0);
4944       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
4945                                        N0.getOperand(0),
4946                           DAG.getConstant(ShiftAmt, DL0,
4947                                           getShiftAmountTy(SmallVT)));
4948       AddToWorklist(SmallShift.getNode());
4949       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
4950       SDLoc DL(N);
4951       return DAG.getNode(ISD::AND, DL, VT,
4952                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
4953                          DAG.getConstant(Mask, DL, VT));
4954     }
4955   }
4956 
4957   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
4958   // bit, which is unmodified by sra.
4959   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
4960     if (N0.getOpcode() == ISD::SRA)
4961       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4962   }
4963 
4964   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
4965   if (N1C && N0.getOpcode() == ISD::CTLZ &&
4966       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
4967     APInt KnownZero, KnownOne;
4968     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
4969 
4970     // If any of the input bits are KnownOne, then the input couldn't be all
4971     // zeros, thus the result of the srl will always be zero.
4972     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
4973 
4974     // If all of the bits input the to ctlz node are known to be zero, then
4975     // the result of the ctlz is "32" and the result of the shift is one.
4976     APInt UnknownBits = ~KnownZero;
4977     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
4978 
4979     // Otherwise, check to see if there is exactly one bit input to the ctlz.
4980     if ((UnknownBits & (UnknownBits - 1)) == 0) {
4981       // Okay, we know that only that the single bit specified by UnknownBits
4982       // could be set on input to the CTLZ node. If this bit is set, the SRL
4983       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
4984       // to an SRL/XOR pair, which is likely to simplify more.
4985       unsigned ShAmt = UnknownBits.countTrailingZeros();
4986       SDValue Op = N0.getOperand(0);
4987 
4988       if (ShAmt) {
4989         SDLoc DL(N0);
4990         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4991                   DAG.getConstant(ShAmt, DL,
4992                                   getShiftAmountTy(Op.getValueType())));
4993         AddToWorklist(Op.getNode());
4994       }
4995 
4996       SDLoc DL(N);
4997       return DAG.getNode(ISD::XOR, DL, VT,
4998                          Op, DAG.getConstant(1, DL, VT));
4999     }
5000   }
5001 
5002   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5003   if (N1.getOpcode() == ISD::TRUNCATE &&
5004       N1.getOperand(0).getOpcode() == ISD::AND) {
5005     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5006       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5007   }
5008 
5009   // fold operands of srl based on knowledge that the low bits are not
5010   // demanded.
5011   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5012     return SDValue(N, 0);
5013 
5014   if (N1C && !N1C->isOpaque())
5015     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5016       return NewSRL;
5017 
5018   // Attempt to convert a srl of a load into a narrower zero-extending load.
5019   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5020     return NarrowLoad;
5021 
5022   // Here is a common situation. We want to optimize:
5023   //
5024   //   %a = ...
5025   //   %b = and i32 %a, 2
5026   //   %c = srl i32 %b, 1
5027   //   brcond i32 %c ...
5028   //
5029   // into
5030   //
5031   //   %a = ...
5032   //   %b = and %a, 2
5033   //   %c = setcc eq %b, 0
5034   //   brcond %c ...
5035   //
5036   // However when after the source operand of SRL is optimized into AND, the SRL
5037   // itself may not be optimized further. Look for it and add the BRCOND into
5038   // the worklist.
5039   if (N->hasOneUse()) {
5040     SDNode *Use = *N->use_begin();
5041     if (Use->getOpcode() == ISD::BRCOND)
5042       AddToWorklist(Use);
5043     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5044       // Also look pass the truncate.
5045       Use = *Use->use_begin();
5046       if (Use->getOpcode() == ISD::BRCOND)
5047         AddToWorklist(Use);
5048     }
5049   }
5050 
5051   return SDValue();
5052 }
5053 
5054 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5055   SDValue N0 = N->getOperand(0);
5056   EVT VT = N->getValueType(0);
5057 
5058   // fold (bswap c1) -> c2
5059   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5060     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5061   // fold (bswap (bswap x)) -> x
5062   if (N0.getOpcode() == ISD::BSWAP)
5063     return N0->getOperand(0);
5064   return SDValue();
5065 }
5066 
5067 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5068   SDValue N0 = N->getOperand(0);
5069 
5070   // fold (bitreverse (bitreverse x)) -> x
5071   if (N0.getOpcode() == ISD::BITREVERSE)
5072     return N0.getOperand(0);
5073   return SDValue();
5074 }
5075 
5076 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5077   SDValue N0 = N->getOperand(0);
5078   EVT VT = N->getValueType(0);
5079 
5080   // fold (ctlz c1) -> c2
5081   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5082     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5083   return SDValue();
5084 }
5085 
5086 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5087   SDValue N0 = N->getOperand(0);
5088   EVT VT = N->getValueType(0);
5089 
5090   // fold (ctlz_zero_undef c1) -> c2
5091   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5092     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5093   return SDValue();
5094 }
5095 
5096 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5097   SDValue N0 = N->getOperand(0);
5098   EVT VT = N->getValueType(0);
5099 
5100   // fold (cttz c1) -> c2
5101   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5102     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5103   return SDValue();
5104 }
5105 
5106 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5107   SDValue N0 = N->getOperand(0);
5108   EVT VT = N->getValueType(0);
5109 
5110   // fold (cttz_zero_undef c1) -> c2
5111   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5112     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5113   return SDValue();
5114 }
5115 
5116 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5117   SDValue N0 = N->getOperand(0);
5118   EVT VT = N->getValueType(0);
5119 
5120   // fold (ctpop c1) -> c2
5121   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5122     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5123   return SDValue();
5124 }
5125 
5126 
5127 /// \brief Generate Min/Max node
5128 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5129                                    SDValue RHS, SDValue True, SDValue False,
5130                                    ISD::CondCode CC, const TargetLowering &TLI,
5131                                    SelectionDAG &DAG) {
5132   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5133     return SDValue();
5134 
5135   switch (CC) {
5136   case ISD::SETOLT:
5137   case ISD::SETOLE:
5138   case ISD::SETLT:
5139   case ISD::SETLE:
5140   case ISD::SETULT:
5141   case ISD::SETULE: {
5142     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5143     if (TLI.isOperationLegal(Opcode, VT))
5144       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5145     return SDValue();
5146   }
5147   case ISD::SETOGT:
5148   case ISD::SETOGE:
5149   case ISD::SETGT:
5150   case ISD::SETGE:
5151   case ISD::SETUGT:
5152   case ISD::SETUGE: {
5153     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5154     if (TLI.isOperationLegal(Opcode, VT))
5155       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5156     return SDValue();
5157   }
5158   default:
5159     return SDValue();
5160   }
5161 }
5162 
5163 // TODO: We should handle other cases of selecting between {-1,0,1} here.
5164 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5165   SDValue Cond = N->getOperand(0);
5166   SDValue N1 = N->getOperand(1);
5167   SDValue N2 = N->getOperand(2);
5168   EVT VT = N->getValueType(0);
5169   EVT CondVT = Cond.getValueType();
5170   SDLoc DL(N);
5171 
5172   // fold (select Cond, 0, 1) -> (xor Cond, 1)
5173   // We can't do this reliably if integer based booleans have different contents
5174   // to floating point based booleans. This is because we can't tell whether we
5175   // have an integer-based boolean or a floating-point-based boolean unless we
5176   // can find the SETCC that produced it and inspect its operands. This is
5177   // fairly easy if C is the SETCC node, but it can potentially be
5178   // undiscoverable (or not reasonably discoverable). For example, it could be
5179   // in another basic block or it could require searching a complicated
5180   // expression.
5181   if (VT.isInteger() &&
5182       (CondVT == MVT::i1 || (CondVT.isInteger() &&
5183                              TLI.getBooleanContents(false, true) ==
5184                                  TargetLowering::ZeroOrOneBooleanContent &&
5185                              TLI.getBooleanContents(false, false) ==
5186                                  TargetLowering::ZeroOrOneBooleanContent)) &&
5187       isNullConstant(N1) && isOneConstant(N2)) {
5188     SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
5189                                   DAG.getConstant(1, DL, CondVT));
5190     if (VT.bitsEq(CondVT))
5191       return NotCond;
5192     return DAG.getZExtOrTrunc(NotCond, DL, VT);
5193   }
5194 
5195   return SDValue();
5196 }
5197 
5198 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5199   SDValue N0 = N->getOperand(0);
5200   SDValue N1 = N->getOperand(1);
5201   SDValue N2 = N->getOperand(2);
5202   EVT VT = N->getValueType(0);
5203   EVT VT0 = N0.getValueType();
5204 
5205   // fold (select C, X, X) -> X
5206   if (N1 == N2)
5207     return N1;
5208   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5209     // fold (select true, X, Y) -> X
5210     // fold (select false, X, Y) -> Y
5211     return !N0C->isNullValue() ? N1 : N2;
5212   }
5213   // fold (select C, 1, X) -> (or C, X)
5214   if (VT == MVT::i1 && isOneConstant(N1))
5215     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5216 
5217   if (SDValue V = foldSelectOfConstants(N))
5218     return V;
5219 
5220   // fold (select C, 0, X) -> (and (not C), X)
5221   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5222     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5223     AddToWorklist(NOTNode.getNode());
5224     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5225   }
5226   // fold (select C, X, 1) -> (or (not C), X)
5227   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
5228     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5229     AddToWorklist(NOTNode.getNode());
5230     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
5231   }
5232   // fold (select C, X, 0) -> (and C, X)
5233   if (VT == MVT::i1 && isNullConstant(N2))
5234     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5235   // fold (select X, X, Y) -> (or X, Y)
5236   // fold (select X, 1, Y) -> (or X, Y)
5237   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5238     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5239   // fold (select X, Y, X) -> (and X, Y)
5240   // fold (select X, Y, 0) -> (and X, Y)
5241   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
5242     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5243 
5244   // If we can fold this based on the true/false value, do so.
5245   if (SimplifySelectOps(N, N1, N2))
5246     return SDValue(N, 0);  // Don't revisit N.
5247 
5248   if (VT0 == MVT::i1) {
5249     // The code in this block deals with the following 2 equivalences:
5250     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
5251     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
5252     // The target can specify its prefered form with the
5253     // shouldNormalizeToSelectSequence() callback. However we always transform
5254     // to the right anyway if we find the inner select exists in the DAG anyway
5255     // and we always transform to the left side if we know that we can further
5256     // optimize the combination of the conditions.
5257     bool normalizeToSequence
5258       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
5259     // select (and Cond0, Cond1), X, Y
5260     //   -> select Cond0, (select Cond1, X, Y), Y
5261     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5262       SDValue Cond0 = N0->getOperand(0);
5263       SDValue Cond1 = N0->getOperand(1);
5264       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5265                                         N1.getValueType(), Cond1, N1, N2);
5266       if (normalizeToSequence || !InnerSelect.use_empty())
5267         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5268                            InnerSelect, N2);
5269     }
5270     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5271     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5272       SDValue Cond0 = N0->getOperand(0);
5273       SDValue Cond1 = N0->getOperand(1);
5274       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5275                                         N1.getValueType(), Cond1, N1, N2);
5276       if (normalizeToSequence || !InnerSelect.use_empty())
5277         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5278                            InnerSelect);
5279     }
5280 
5281     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5282     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
5283       SDValue N1_0 = N1->getOperand(0);
5284       SDValue N1_1 = N1->getOperand(1);
5285       SDValue N1_2 = N1->getOperand(2);
5286       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5287         // Create the actual and node if we can generate good code for it.
5288         if (!normalizeToSequence) {
5289           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5290                                     N0, N1_0);
5291           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5292                              N1_1, N2);
5293         }
5294         // Otherwise see if we can optimize the "and" to a better pattern.
5295         if (SDValue Combined = visitANDLike(N0, N1_0, N))
5296           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5297                              N1_1, N2);
5298       }
5299     }
5300     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5301     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
5302       SDValue N2_0 = N2->getOperand(0);
5303       SDValue N2_1 = N2->getOperand(1);
5304       SDValue N2_2 = N2->getOperand(2);
5305       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5306         // Create the actual or node if we can generate good code for it.
5307         if (!normalizeToSequence) {
5308           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5309                                    N0, N2_0);
5310           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5311                              N1, N2_2);
5312         }
5313         // Otherwise see if we can optimize to a better pattern.
5314         if (SDValue Combined = visitORLike(N0, N2_0, N))
5315           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5316                              N1, N2_2);
5317       }
5318     }
5319   }
5320 
5321   // select (xor Cond, 1), X, Y -> select Cond, Y, X
5322   // select (xor Cond, 0), X, Y -> selext Cond, X, Y
5323   if (VT0 == MVT::i1) {
5324     if (N0->getOpcode() == ISD::XOR) {
5325       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
5326         SDValue Cond0 = N0->getOperand(0);
5327         if (C->isOne())
5328           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
5329                              Cond0, N2, N1);
5330         else
5331           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
5332                              Cond0, N1, N2);
5333       }
5334     }
5335   }
5336 
5337   // fold selects based on a setcc into other things, such as min/max/abs
5338   if (N0.getOpcode() == ISD::SETCC) {
5339     // select x, y (fcmp lt x, y) -> fminnum x, y
5340     // select x, y (fcmp gt x, y) -> fmaxnum x, y
5341     //
5342     // This is OK if we don't care about what happens if either operand is a
5343     // NaN.
5344     //
5345 
5346     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
5347     // no signed zeros as well as no nans.
5348     const TargetOptions &Options = DAG.getTarget().Options;
5349     if (Options.UnsafeFPMath &&
5350         VT.isFloatingPoint() && N0.hasOneUse() &&
5351         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
5352       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5353 
5354       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
5355                                                 N0.getOperand(1), N1, N2, CC,
5356                                                 TLI, DAG))
5357         return FMinMax;
5358     }
5359 
5360     if ((!LegalOperations &&
5361          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5362         TLI.isOperationLegal(ISD::SELECT_CC, VT))
5363       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5364                          N0.getOperand(0), N0.getOperand(1),
5365                          N1, N2, N0.getOperand(2));
5366     return SimplifySelect(SDLoc(N), N0, N1, N2);
5367   }
5368 
5369   return SDValue();
5370 }
5371 
5372 static
5373 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5374   SDLoc DL(N);
5375   EVT LoVT, HiVT;
5376   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5377 
5378   // Split the inputs.
5379   SDValue Lo, Hi, LL, LH, RL, RH;
5380   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5381   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5382 
5383   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5384   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5385 
5386   return std::make_pair(Lo, Hi);
5387 }
5388 
5389 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5390 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5391 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
5392   SDLoc DL(N);
5393   SDValue Cond = N->getOperand(0);
5394   SDValue LHS = N->getOperand(1);
5395   SDValue RHS = N->getOperand(2);
5396   EVT VT = N->getValueType(0);
5397   int NumElems = VT.getVectorNumElements();
5398   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
5399          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5400          Cond.getOpcode() == ISD::BUILD_VECTOR);
5401 
5402   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5403   // binary ones here.
5404   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5405     return SDValue();
5406 
5407   // We're sure we have an even number of elements due to the
5408   // concat_vectors we have as arguments to vselect.
5409   // Skip BV elements until we find one that's not an UNDEF
5410   // After we find an UNDEF element, keep looping until we get to half the
5411   // length of the BV and see if all the non-undef nodes are the same.
5412   ConstantSDNode *BottomHalf = nullptr;
5413   for (int i = 0; i < NumElems / 2; ++i) {
5414     if (Cond->getOperand(i)->isUndef())
5415       continue;
5416 
5417     if (BottomHalf == nullptr)
5418       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5419     else if (Cond->getOperand(i).getNode() != BottomHalf)
5420       return SDValue();
5421   }
5422 
5423   // Do the same for the second half of the BuildVector
5424   ConstantSDNode *TopHalf = nullptr;
5425   for (int i = NumElems / 2; i < NumElems; ++i) {
5426     if (Cond->getOperand(i)->isUndef())
5427       continue;
5428 
5429     if (TopHalf == nullptr)
5430       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5431     else if (Cond->getOperand(i).getNode() != TopHalf)
5432       return SDValue();
5433   }
5434 
5435   assert(TopHalf && BottomHalf &&
5436          "One half of the selector was all UNDEFs and the other was all the "
5437          "same value. This should have been addressed before this function.");
5438   return DAG.getNode(
5439       ISD::CONCAT_VECTORS, DL, VT,
5440       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5441       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5442 }
5443 
5444 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5445 
5446   if (Level >= AfterLegalizeTypes)
5447     return SDValue();
5448 
5449   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5450   SDValue Mask = MSC->getMask();
5451   SDValue Data  = MSC->getValue();
5452   SDLoc DL(N);
5453 
5454   // If the MSCATTER data type requires splitting and the mask is provided by a
5455   // SETCC, then split both nodes and its operands before legalization. This
5456   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5457   // and enables future optimizations (e.g. min/max pattern matching on X86).
5458   if (Mask.getOpcode() != ISD::SETCC)
5459     return SDValue();
5460 
5461   // Check if any splitting is required.
5462   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5463       TargetLowering::TypeSplitVector)
5464     return SDValue();
5465   SDValue MaskLo, MaskHi, Lo, Hi;
5466   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5467 
5468   EVT LoVT, HiVT;
5469   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5470 
5471   SDValue Chain = MSC->getChain();
5472 
5473   EVT MemoryVT = MSC->getMemoryVT();
5474   unsigned Alignment = MSC->getOriginalAlignment();
5475 
5476   EVT LoMemVT, HiMemVT;
5477   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5478 
5479   SDValue DataLo, DataHi;
5480   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5481 
5482   SDValue BasePtr = MSC->getBasePtr();
5483   SDValue IndexLo, IndexHi;
5484   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5485 
5486   MachineMemOperand *MMO = DAG.getMachineFunction().
5487     getMachineMemOperand(MSC->getPointerInfo(),
5488                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5489                           Alignment, MSC->getAAInfo(), MSC->getRanges());
5490 
5491   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5492   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5493                             DL, OpsLo, MMO);
5494 
5495   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5496   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5497                             DL, OpsHi, MMO);
5498 
5499   AddToWorklist(Lo.getNode());
5500   AddToWorklist(Hi.getNode());
5501 
5502   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5503 }
5504 
5505 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5506 
5507   if (Level >= AfterLegalizeTypes)
5508     return SDValue();
5509 
5510   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
5511   SDValue Mask = MST->getMask();
5512   SDValue Data  = MST->getValue();
5513   SDLoc DL(N);
5514 
5515   // If the MSTORE data type requires splitting and the mask is provided by a
5516   // SETCC, then split both nodes and its operands before legalization. This
5517   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5518   // and enables future optimizations (e.g. min/max pattern matching on X86).
5519   if (Mask.getOpcode() == ISD::SETCC) {
5520 
5521     // Check if any splitting is required.
5522     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5523         TargetLowering::TypeSplitVector)
5524       return SDValue();
5525 
5526     SDValue MaskLo, MaskHi, Lo, Hi;
5527     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5528 
5529     EVT LoVT, HiVT;
5530     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
5531 
5532     SDValue Chain = MST->getChain();
5533     SDValue Ptr   = MST->getBasePtr();
5534 
5535     EVT MemoryVT = MST->getMemoryVT();
5536     unsigned Alignment = MST->getOriginalAlignment();
5537 
5538     // if Alignment is equal to the vector size,
5539     // take the half of it for the second part
5540     unsigned SecondHalfAlignment =
5541       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
5542          Alignment/2 : Alignment;
5543 
5544     EVT LoMemVT, HiMemVT;
5545     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5546 
5547     SDValue DataLo, DataHi;
5548     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5549 
5550     MachineMemOperand *MMO = DAG.getMachineFunction().
5551       getMachineMemOperand(MST->getPointerInfo(),
5552                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5553                            Alignment, MST->getAAInfo(), MST->getRanges());
5554 
5555     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5556                             MST->isTruncatingStore());
5557 
5558     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5559     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5560                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5561 
5562     MMO = DAG.getMachineFunction().
5563       getMachineMemOperand(MST->getPointerInfo(),
5564                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
5565                            SecondHalfAlignment, MST->getAAInfo(),
5566                            MST->getRanges());
5567 
5568     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5569                             MST->isTruncatingStore());
5570 
5571     AddToWorklist(Lo.getNode());
5572     AddToWorklist(Hi.getNode());
5573 
5574     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5575   }
5576   return SDValue();
5577 }
5578 
5579 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5580 
5581   if (Level >= AfterLegalizeTypes)
5582     return SDValue();
5583 
5584   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
5585   SDValue Mask = MGT->getMask();
5586   SDLoc DL(N);
5587 
5588   // If the MGATHER result requires splitting and the mask is provided by a
5589   // SETCC, then split both nodes and its operands before legalization. This
5590   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5591   // and enables future optimizations (e.g. min/max pattern matching on X86).
5592 
5593   if (Mask.getOpcode() != ISD::SETCC)
5594     return SDValue();
5595 
5596   EVT VT = N->getValueType(0);
5597 
5598   // Check if any splitting is required.
5599   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5600       TargetLowering::TypeSplitVector)
5601     return SDValue();
5602 
5603   SDValue MaskLo, MaskHi, Lo, Hi;
5604   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5605 
5606   SDValue Src0 = MGT->getValue();
5607   SDValue Src0Lo, Src0Hi;
5608   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5609 
5610   EVT LoVT, HiVT;
5611   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5612 
5613   SDValue Chain = MGT->getChain();
5614   EVT MemoryVT = MGT->getMemoryVT();
5615   unsigned Alignment = MGT->getOriginalAlignment();
5616 
5617   EVT LoMemVT, HiMemVT;
5618   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5619 
5620   SDValue BasePtr = MGT->getBasePtr();
5621   SDValue Index = MGT->getIndex();
5622   SDValue IndexLo, IndexHi;
5623   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5624 
5625   MachineMemOperand *MMO = DAG.getMachineFunction().
5626     getMachineMemOperand(MGT->getPointerInfo(),
5627                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5628                           Alignment, MGT->getAAInfo(), MGT->getRanges());
5629 
5630   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5631   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5632                             MMO);
5633 
5634   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5635   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5636                             MMO);
5637 
5638   AddToWorklist(Lo.getNode());
5639   AddToWorklist(Hi.getNode());
5640 
5641   // Build a factor node to remember that this load is independent of the
5642   // other one.
5643   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5644                       Hi.getValue(1));
5645 
5646   // Legalized the chain result - switch anything that used the old chain to
5647   // use the new one.
5648   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5649 
5650   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5651 
5652   SDValue RetOps[] = { GatherRes, Chain };
5653   return DAG.getMergeValues(RetOps, DL);
5654 }
5655 
5656 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5657 
5658   if (Level >= AfterLegalizeTypes)
5659     return SDValue();
5660 
5661   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
5662   SDValue Mask = MLD->getMask();
5663   SDLoc DL(N);
5664 
5665   // If the MLOAD result requires splitting and the mask is provided by a
5666   // SETCC, then split both nodes and its operands before legalization. This
5667   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5668   // and enables future optimizations (e.g. min/max pattern matching on X86).
5669 
5670   if (Mask.getOpcode() == ISD::SETCC) {
5671     EVT VT = N->getValueType(0);
5672 
5673     // Check if any splitting is required.
5674     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5675         TargetLowering::TypeSplitVector)
5676       return SDValue();
5677 
5678     SDValue MaskLo, MaskHi, Lo, Hi;
5679     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5680 
5681     SDValue Src0 = MLD->getSrc0();
5682     SDValue Src0Lo, Src0Hi;
5683     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5684 
5685     EVT LoVT, HiVT;
5686     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5687 
5688     SDValue Chain = MLD->getChain();
5689     SDValue Ptr   = MLD->getBasePtr();
5690     EVT MemoryVT = MLD->getMemoryVT();
5691     unsigned Alignment = MLD->getOriginalAlignment();
5692 
5693     // if Alignment is equal to the vector size,
5694     // take the half of it for the second part
5695     unsigned SecondHalfAlignment =
5696       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5697          Alignment/2 : Alignment;
5698 
5699     EVT LoMemVT, HiMemVT;
5700     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5701 
5702     MachineMemOperand *MMO = DAG.getMachineFunction().
5703     getMachineMemOperand(MLD->getPointerInfo(),
5704                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5705                          Alignment, MLD->getAAInfo(), MLD->getRanges());
5706 
5707     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5708                            ISD::NON_EXTLOAD);
5709 
5710     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5711     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5712                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5713 
5714     MMO = DAG.getMachineFunction().
5715     getMachineMemOperand(MLD->getPointerInfo(),
5716                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
5717                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5718 
5719     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5720                            ISD::NON_EXTLOAD);
5721 
5722     AddToWorklist(Lo.getNode());
5723     AddToWorklist(Hi.getNode());
5724 
5725     // Build a factor node to remember that this load is independent of the
5726     // other one.
5727     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5728                         Hi.getValue(1));
5729 
5730     // Legalized the chain result - switch anything that used the old chain to
5731     // use the new one.
5732     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5733 
5734     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5735 
5736     SDValue RetOps[] = { LoadRes, Chain };
5737     return DAG.getMergeValues(RetOps, DL);
5738   }
5739   return SDValue();
5740 }
5741 
5742 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5743   SDValue N0 = N->getOperand(0);
5744   SDValue N1 = N->getOperand(1);
5745   SDValue N2 = N->getOperand(2);
5746   SDLoc DL(N);
5747 
5748   // Canonicalize integer abs.
5749   // vselect (setg[te] X,  0),  X, -X ->
5750   // vselect (setgt    X, -1),  X, -X ->
5751   // vselect (setl[te] X,  0), -X,  X ->
5752   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5753   if (N0.getOpcode() == ISD::SETCC) {
5754     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5755     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5756     bool isAbs = false;
5757     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5758 
5759     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5760          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5761         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5762       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
5763     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5764              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5765       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5766 
5767     if (isAbs) {
5768       EVT VT = LHS.getValueType();
5769       SDValue Shift = DAG.getNode(
5770           ISD::SRA, DL, VT, LHS,
5771           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
5772       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5773       AddToWorklist(Shift.getNode());
5774       AddToWorklist(Add.getNode());
5775       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5776     }
5777   }
5778 
5779   if (SimplifySelectOps(N, N1, N2))
5780     return SDValue(N, 0);  // Don't revisit N.
5781 
5782   // If the VSELECT result requires splitting and the mask is provided by a
5783   // SETCC, then split both nodes and its operands before legalization. This
5784   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5785   // and enables future optimizations (e.g. min/max pattern matching on X86).
5786   if (N0.getOpcode() == ISD::SETCC) {
5787     EVT VT = N->getValueType(0);
5788 
5789     // Check if any splitting is required.
5790     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5791         TargetLowering::TypeSplitVector)
5792       return SDValue();
5793 
5794     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5795     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5796     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5797     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5798 
5799     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5800     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5801 
5802     // Add the new VSELECT nodes to the work list in case they need to be split
5803     // again.
5804     AddToWorklist(Lo.getNode());
5805     AddToWorklist(Hi.getNode());
5806 
5807     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5808   }
5809 
5810   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5811   if (ISD::isBuildVectorAllOnes(N0.getNode()))
5812     return N1;
5813   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5814   if (ISD::isBuildVectorAllZeros(N0.getNode()))
5815     return N2;
5816 
5817   // The ConvertSelectToConcatVector function is assuming both the above
5818   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5819   // and addressed.
5820   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5821       N2.getOpcode() == ISD::CONCAT_VECTORS &&
5822       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
5823     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
5824       return CV;
5825   }
5826 
5827   return SDValue();
5828 }
5829 
5830 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5831   SDValue N0 = N->getOperand(0);
5832   SDValue N1 = N->getOperand(1);
5833   SDValue N2 = N->getOperand(2);
5834   SDValue N3 = N->getOperand(3);
5835   SDValue N4 = N->getOperand(4);
5836   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5837 
5838   // fold select_cc lhs, rhs, x, x, cc -> x
5839   if (N2 == N3)
5840     return N2;
5841 
5842   // Determine if the condition we're dealing with is constant
5843   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
5844                                   CC, SDLoc(N), false)) {
5845     AddToWorklist(SCC.getNode());
5846 
5847     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5848       if (!SCCC->isNullValue())
5849         return N2;    // cond always true -> true val
5850       else
5851         return N3;    // cond always false -> false val
5852     } else if (SCC->isUndef()) {
5853       // When the condition is UNDEF, just return the first operand. This is
5854       // coherent the DAG creation, no setcc node is created in this case
5855       return N2;
5856     } else if (SCC.getOpcode() == ISD::SETCC) {
5857       // Fold to a simpler select_cc
5858       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
5859                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
5860                          SCC.getOperand(2));
5861     }
5862   }
5863 
5864   // If we can fold this based on the true/false value, do so.
5865   if (SimplifySelectOps(N, N2, N3))
5866     return SDValue(N, 0);  // Don't revisit N.
5867 
5868   // fold select_cc into other things, such as min/max/abs
5869   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
5870 }
5871 
5872 SDValue DAGCombiner::visitSETCC(SDNode *N) {
5873   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
5874                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
5875                        SDLoc(N));
5876 }
5877 
5878 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
5879   SDValue LHS = N->getOperand(0);
5880   SDValue RHS = N->getOperand(1);
5881   SDValue Carry = N->getOperand(2);
5882   SDValue Cond = N->getOperand(3);
5883 
5884   // If Carry is false, fold to a regular SETCC.
5885   if (Carry.getOpcode() == ISD::CARRY_FALSE)
5886     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
5887 
5888   return SDValue();
5889 }
5890 
5891 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
5892 /// a build_vector of constants.
5893 /// This function is called by the DAGCombiner when visiting sext/zext/aext
5894 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
5895 /// Vector extends are not folded if operations are legal; this is to
5896 /// avoid introducing illegal build_vector dag nodes.
5897 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
5898                                          SelectionDAG &DAG, bool LegalTypes,
5899                                          bool LegalOperations) {
5900   unsigned Opcode = N->getOpcode();
5901   SDValue N0 = N->getOperand(0);
5902   EVT VT = N->getValueType(0);
5903 
5904   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
5905          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5906          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
5907          && "Expected EXTEND dag node in input!");
5908 
5909   // fold (sext c1) -> c1
5910   // fold (zext c1) -> c1
5911   // fold (aext c1) -> c1
5912   if (isa<ConstantSDNode>(N0))
5913     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
5914 
5915   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
5916   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
5917   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
5918   EVT SVT = VT.getScalarType();
5919   if (!(VT.isVector() &&
5920       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
5921       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
5922     return nullptr;
5923 
5924   // We can fold this node into a build_vector.
5925   unsigned VTBits = SVT.getSizeInBits();
5926   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
5927   SmallVector<SDValue, 8> Elts;
5928   unsigned NumElts = VT.getVectorNumElements();
5929   SDLoc DL(N);
5930 
5931   for (unsigned i=0; i != NumElts; ++i) {
5932     SDValue Op = N0->getOperand(i);
5933     if (Op->isUndef()) {
5934       Elts.push_back(DAG.getUNDEF(SVT));
5935       continue;
5936     }
5937 
5938     SDLoc DL(Op);
5939     // Get the constant value and if needed trunc it to the size of the type.
5940     // Nodes like build_vector might have constants wider than the scalar type.
5941     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
5942     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5943       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
5944     else
5945       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
5946   }
5947 
5948   return DAG.getBuildVector(VT, DL, Elts).getNode();
5949 }
5950 
5951 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
5952 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
5953 // transformation. Returns true if extension are possible and the above
5954 // mentioned transformation is profitable.
5955 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
5956                                     unsigned ExtOpc,
5957                                     SmallVectorImpl<SDNode *> &ExtendNodes,
5958                                     const TargetLowering &TLI) {
5959   bool HasCopyToRegUses = false;
5960   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
5961   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
5962                             UE = N0.getNode()->use_end();
5963        UI != UE; ++UI) {
5964     SDNode *User = *UI;
5965     if (User == N)
5966       continue;
5967     if (UI.getUse().getResNo() != N0.getResNo())
5968       continue;
5969     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
5970     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
5971       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
5972       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
5973         // Sign bits will be lost after a zext.
5974         return false;
5975       bool Add = false;
5976       for (unsigned i = 0; i != 2; ++i) {
5977         SDValue UseOp = User->getOperand(i);
5978         if (UseOp == N0)
5979           continue;
5980         if (!isa<ConstantSDNode>(UseOp))
5981           return false;
5982         Add = true;
5983       }
5984       if (Add)
5985         ExtendNodes.push_back(User);
5986       continue;
5987     }
5988     // If truncates aren't free and there are users we can't
5989     // extend, it isn't worthwhile.
5990     if (!isTruncFree)
5991       return false;
5992     // Remember if this value is live-out.
5993     if (User->getOpcode() == ISD::CopyToReg)
5994       HasCopyToRegUses = true;
5995   }
5996 
5997   if (HasCopyToRegUses) {
5998     bool BothLiveOut = false;
5999     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6000          UI != UE; ++UI) {
6001       SDUse &Use = UI.getUse();
6002       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6003         BothLiveOut = true;
6004         break;
6005       }
6006     }
6007     if (BothLiveOut)
6008       // Both unextended and extended values are live out. There had better be
6009       // a good reason for the transformation.
6010       return ExtendNodes.size();
6011   }
6012   return true;
6013 }
6014 
6015 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6016                                   SDValue Trunc, SDValue ExtLoad,
6017                                   const SDLoc &DL, ISD::NodeType ExtType) {
6018   // Extend SetCC uses if necessary.
6019   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6020     SDNode *SetCC = SetCCs[i];
6021     SmallVector<SDValue, 4> Ops;
6022 
6023     for (unsigned j = 0; j != 2; ++j) {
6024       SDValue SOp = SetCC->getOperand(j);
6025       if (SOp == Trunc)
6026         Ops.push_back(ExtLoad);
6027       else
6028         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6029     }
6030 
6031     Ops.push_back(SetCC->getOperand(2));
6032     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6033   }
6034 }
6035 
6036 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6037 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6038   SDValue N0 = N->getOperand(0);
6039   EVT DstVT = N->getValueType(0);
6040   EVT SrcVT = N0.getValueType();
6041 
6042   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6043           N->getOpcode() == ISD::ZERO_EXTEND) &&
6044          "Unexpected node type (not an extend)!");
6045 
6046   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6047   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6048   //   (v8i32 (sext (v8i16 (load x))))
6049   // into:
6050   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6051   //                          (v4i32 (sextload (x + 16)))))
6052   // Where uses of the original load, i.e.:
6053   //   (v8i16 (load x))
6054   // are replaced with:
6055   //   (v8i16 (truncate
6056   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6057   //                            (v4i32 (sextload (x + 16)))))))
6058   //
6059   // This combine is only applicable to illegal, but splittable, vectors.
6060   // All legal types, and illegal non-vector types, are handled elsewhere.
6061   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6062   //
6063   if (N0->getOpcode() != ISD::LOAD)
6064     return SDValue();
6065 
6066   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6067 
6068   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6069       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6070       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6071     return SDValue();
6072 
6073   SmallVector<SDNode *, 4> SetCCs;
6074   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6075     return SDValue();
6076 
6077   ISD::LoadExtType ExtType =
6078       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6079 
6080   // Try to split the vector types to get down to legal types.
6081   EVT SplitSrcVT = SrcVT;
6082   EVT SplitDstVT = DstVT;
6083   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6084          SplitSrcVT.getVectorNumElements() > 1) {
6085     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6086     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6087   }
6088 
6089   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6090     return SDValue();
6091 
6092   SDLoc DL(N);
6093   const unsigned NumSplits =
6094       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6095   const unsigned Stride = SplitSrcVT.getStoreSize();
6096   SmallVector<SDValue, 4> Loads;
6097   SmallVector<SDValue, 4> Chains;
6098 
6099   SDValue BasePtr = LN0->getBasePtr();
6100   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6101     const unsigned Offset = Idx * Stride;
6102     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6103 
6104     SDValue SplitLoad = DAG.getExtLoad(
6105         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6106         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6107         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6108 
6109     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6110                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6111 
6112     Loads.push_back(SplitLoad.getValue(0));
6113     Chains.push_back(SplitLoad.getValue(1));
6114   }
6115 
6116   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6117   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6118 
6119   CombineTo(N, NewValue);
6120 
6121   // Replace uses of the original load (before extension)
6122   // with a truncate of the concatenated sextloaded vectors.
6123   SDValue Trunc =
6124       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6125   CombineTo(N0.getNode(), Trunc, NewChain);
6126   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6127                   (ISD::NodeType)N->getOpcode());
6128   return SDValue(N, 0); // Return N so it doesn't get rechecked!
6129 }
6130 
6131 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6132   SDValue N0 = N->getOperand(0);
6133   EVT VT = N->getValueType(0);
6134 
6135   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6136                                               LegalOperations))
6137     return SDValue(Res, 0);
6138 
6139   // fold (sext (sext x)) -> (sext x)
6140   // fold (sext (aext x)) -> (sext x)
6141   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6142     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
6143                        N0.getOperand(0));
6144 
6145   if (N0.getOpcode() == ISD::TRUNCATE) {
6146     // fold (sext (truncate (load x))) -> (sext (smaller load x))
6147     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6148     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6149       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6150       if (NarrowLoad.getNode() != N0.getNode()) {
6151         CombineTo(N0.getNode(), NarrowLoad);
6152         // CombineTo deleted the truncate, if needed, but not what's under it.
6153         AddToWorklist(oye);
6154       }
6155       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6156     }
6157 
6158     // See if the value being truncated is already sign extended.  If so, just
6159     // eliminate the trunc/sext pair.
6160     SDValue Op = N0.getOperand(0);
6161     unsigned OpBits   = Op.getScalarValueSizeInBits();
6162     unsigned MidBits  = N0.getScalarValueSizeInBits();
6163     unsigned DestBits = VT.getScalarSizeInBits();
6164     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6165 
6166     if (OpBits == DestBits) {
6167       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
6168       // bits, it is already ready.
6169       if (NumSignBits > DestBits-MidBits)
6170         return Op;
6171     } else if (OpBits < DestBits) {
6172       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
6173       // bits, just sext from i32.
6174       if (NumSignBits > OpBits-MidBits)
6175         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
6176     } else {
6177       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6178       // bits, just truncate to i32.
6179       if (NumSignBits > OpBits-MidBits)
6180         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6181     }
6182 
6183     // fold (sext (truncate x)) -> (sextinreg x).
6184     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6185                                                  N0.getValueType())) {
6186       if (OpBits < DestBits)
6187         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6188       else if (OpBits > DestBits)
6189         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6190       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
6191                          DAG.getValueType(N0.getValueType()));
6192     }
6193   }
6194 
6195   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6196   // Only generate vector extloads when 1) they're legal, and 2) they are
6197   // deemed desirable by the target.
6198   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6199       ((!LegalOperations && !VT.isVector() &&
6200         !cast<LoadSDNode>(N0)->isVolatile()) ||
6201        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6202     bool DoXform = true;
6203     SmallVector<SDNode*, 4> SetCCs;
6204     if (!N0.hasOneUse())
6205       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6206     if (VT.isVector())
6207       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6208     if (DoXform) {
6209       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6210       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6211                                        LN0->getChain(),
6212                                        LN0->getBasePtr(), N0.getValueType(),
6213                                        LN0->getMemOperand());
6214       CombineTo(N, ExtLoad);
6215       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6216                                   N0.getValueType(), ExtLoad);
6217       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6218       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6219                       ISD::SIGN_EXTEND);
6220       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6221     }
6222   }
6223 
6224   // fold (sext (load x)) to multiple smaller sextloads.
6225   // Only on illegal but splittable vectors.
6226   if (SDValue ExtLoad = CombineExtLoad(N))
6227     return ExtLoad;
6228 
6229   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6230   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6231   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6232       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6233     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6234     EVT MemVT = LN0->getMemoryVT();
6235     if ((!LegalOperations && !LN0->isVolatile()) ||
6236         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6237       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6238                                        LN0->getChain(),
6239                                        LN0->getBasePtr(), MemVT,
6240                                        LN0->getMemOperand());
6241       CombineTo(N, ExtLoad);
6242       CombineTo(N0.getNode(),
6243                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6244                             N0.getValueType(), ExtLoad),
6245                 ExtLoad.getValue(1));
6246       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6247     }
6248   }
6249 
6250   // fold (sext (and/or/xor (load x), cst)) ->
6251   //      (and/or/xor (sextload x), (sext cst))
6252   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6253        N0.getOpcode() == ISD::XOR) &&
6254       isa<LoadSDNode>(N0.getOperand(0)) &&
6255       N0.getOperand(1).getOpcode() == ISD::Constant &&
6256       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6257       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6258     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6259     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6260       bool DoXform = true;
6261       SmallVector<SDNode*, 4> SetCCs;
6262       if (!N0.hasOneUse())
6263         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
6264                                           SetCCs, TLI);
6265       if (DoXform) {
6266         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
6267                                          LN0->getChain(), LN0->getBasePtr(),
6268                                          LN0->getMemoryVT(),
6269                                          LN0->getMemOperand());
6270         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6271         Mask = Mask.sext(VT.getSizeInBits());
6272         SDLoc DL(N);
6273         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6274                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6275         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6276                                     SDLoc(N0.getOperand(0)),
6277                                     N0.getOperand(0).getValueType(), ExtLoad);
6278         CombineTo(N, And);
6279         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6280         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6281                         ISD::SIGN_EXTEND);
6282         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6283       }
6284     }
6285   }
6286 
6287   if (N0.getOpcode() == ISD::SETCC) {
6288     EVT N0VT = N0.getOperand(0).getValueType();
6289     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
6290     // Only do this before legalize for now.
6291     if (VT.isVector() && !LegalOperations &&
6292         TLI.getBooleanContents(N0VT) ==
6293             TargetLowering::ZeroOrNegativeOneBooleanContent) {
6294       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
6295       // of the same size as the compared operands. Only optimize sext(setcc())
6296       // if this is the case.
6297       EVT SVT = getSetCCResultType(N0VT);
6298 
6299       // We know that the # elements of the results is the same as the
6300       // # elements of the compare (and the # elements of the compare result
6301       // for that matter).  Check to see that they are the same size.  If so,
6302       // we know that the element size of the sext'd result matches the
6303       // element size of the compare operands.
6304       if (VT.getSizeInBits() == SVT.getSizeInBits())
6305         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6306                              N0.getOperand(1),
6307                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6308 
6309       // If the desired elements are smaller or larger than the source
6310       // elements we can use a matching integer vector type and then
6311       // truncate/sign extend
6312       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6313       if (SVT == MatchingVectorType) {
6314         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6315                                N0.getOperand(0), N0.getOperand(1),
6316                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
6317         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6318       }
6319     }
6320 
6321     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
6322     // Here, T can be 1 or -1, depending on the type of the setcc and
6323     // getBooleanContents().
6324     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
6325 
6326     SDLoc DL(N);
6327     // To determine the "true" side of the select, we need to know the high bit
6328     // of the value returned by the setcc if it evaluates to true.
6329     // If the type of the setcc is i1, then the true case of the select is just
6330     // sext(i1 1), that is, -1.
6331     // If the type of the setcc is larger (say, i8) then the value of the high
6332     // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
6333     // of the appropriate width.
6334     SDValue ExtTrueVal =
6335         (SetCCWidth == 1)
6336             ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
6337                               DL, VT)
6338             : TLI.getConstTrueVal(DAG, VT, DL);
6339 
6340     if (SDValue SCC = SimplifySelectCC(
6341             DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
6342             DAG.getConstant(0, DL, VT),
6343             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6344       return SCC;
6345 
6346     if (!VT.isVector()) {
6347       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6348       if (!LegalOperations ||
6349           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
6350         SDLoc DL(N);
6351         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6352         SDValue SetCC =
6353             DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
6354         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
6355                              DAG.getConstant(0, DL, VT));
6356       }
6357     }
6358   }
6359 
6360   // fold (sext x) -> (zext x) if the sign bit is known zero.
6361   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6362       DAG.SignBitIsZero(N0))
6363     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6364 
6365   return SDValue();
6366 }
6367 
6368 // isTruncateOf - If N is a truncate of some other value, return true, record
6369 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6370 // This function computes KnownZero to avoid a duplicated call to
6371 // computeKnownBits in the caller.
6372 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6373                          APInt &KnownZero) {
6374   APInt KnownOne;
6375   if (N->getOpcode() == ISD::TRUNCATE) {
6376     Op = N->getOperand(0);
6377     DAG.computeKnownBits(Op, KnownZero, KnownOne);
6378     return true;
6379   }
6380 
6381   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6382       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6383     return false;
6384 
6385   SDValue Op0 = N->getOperand(0);
6386   SDValue Op1 = N->getOperand(1);
6387   assert(Op0.getValueType() == Op1.getValueType());
6388 
6389   if (isNullConstant(Op0))
6390     Op = Op1;
6391   else if (isNullConstant(Op1))
6392     Op = Op0;
6393   else
6394     return false;
6395 
6396   DAG.computeKnownBits(Op, KnownZero, KnownOne);
6397 
6398   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6399     return false;
6400 
6401   return true;
6402 }
6403 
6404 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6405   SDValue N0 = N->getOperand(0);
6406   EVT VT = N->getValueType(0);
6407 
6408   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6409                                               LegalOperations))
6410     return SDValue(Res, 0);
6411 
6412   // fold (zext (zext x)) -> (zext x)
6413   // fold (zext (aext x)) -> (zext x)
6414   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6415     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6416                        N0.getOperand(0));
6417 
6418   // fold (zext (truncate x)) -> (zext x) or
6419   //      (zext (truncate x)) -> (truncate x)
6420   // This is valid when the truncated bits of x are already zero.
6421   // FIXME: We should extend this to work for vectors too.
6422   SDValue Op;
6423   APInt KnownZero;
6424   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6425     APInt TruncatedBits =
6426       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6427       APInt(Op.getValueSizeInBits(), 0) :
6428       APInt::getBitsSet(Op.getValueSizeInBits(),
6429                         N0.getValueSizeInBits(),
6430                         std::min(Op.getValueSizeInBits(),
6431                                  VT.getSizeInBits()));
6432     if (TruncatedBits == (KnownZero & TruncatedBits)) {
6433       if (VT.bitsGT(Op.getValueType()))
6434         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6435       if (VT.bitsLT(Op.getValueType()))
6436         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6437 
6438       return Op;
6439     }
6440   }
6441 
6442   // fold (zext (truncate (load x))) -> (zext (smaller load x))
6443   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6444   if (N0.getOpcode() == ISD::TRUNCATE) {
6445     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6446       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6447       if (NarrowLoad.getNode() != N0.getNode()) {
6448         CombineTo(N0.getNode(), NarrowLoad);
6449         // CombineTo deleted the truncate, if needed, but not what's under it.
6450         AddToWorklist(oye);
6451       }
6452       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6453     }
6454   }
6455 
6456   // fold (zext (truncate x)) -> (and x, mask)
6457   if (N0.getOpcode() == ISD::TRUNCATE) {
6458     // fold (zext (truncate (load x))) -> (zext (smaller load x))
6459     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6460     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6461       SDNode *oye = N0.getNode()->getOperand(0).getNode();
6462       if (NarrowLoad.getNode() != N0.getNode()) {
6463         CombineTo(N0.getNode(), NarrowLoad);
6464         // CombineTo deleted the truncate, if needed, but not what's under it.
6465         AddToWorklist(oye);
6466       }
6467       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6468     }
6469 
6470     EVT SrcVT = N0.getOperand(0).getValueType();
6471     EVT MinVT = N0.getValueType();
6472 
6473     // Try to mask before the extension to avoid having to generate a larger mask,
6474     // possibly over several sub-vectors.
6475     if (SrcVT.bitsLT(VT)) {
6476       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
6477                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
6478         SDValue Op = N0.getOperand(0);
6479         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6480         AddToWorklist(Op.getNode());
6481         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
6482       }
6483     }
6484 
6485     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
6486       SDValue Op = N0.getOperand(0);
6487       if (SrcVT.bitsLT(VT)) {
6488         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6489         AddToWorklist(Op.getNode());
6490       } else if (SrcVT.bitsGT(VT)) {
6491         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6492         AddToWorklist(Op.getNode());
6493       }
6494       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6495     }
6496   }
6497 
6498   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6499   // if either of the casts is not free.
6500   if (N0.getOpcode() == ISD::AND &&
6501       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6502       N0.getOperand(1).getOpcode() == ISD::Constant &&
6503       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6504                            N0.getValueType()) ||
6505        !TLI.isZExtFree(N0.getValueType(), VT))) {
6506     SDValue X = N0.getOperand(0).getOperand(0);
6507     if (X.getValueType().bitsLT(VT)) {
6508       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6509     } else if (X.getValueType().bitsGT(VT)) {
6510       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6511     }
6512     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6513     Mask = Mask.zext(VT.getSizeInBits());
6514     SDLoc DL(N);
6515     return DAG.getNode(ISD::AND, DL, VT,
6516                        X, DAG.getConstant(Mask, DL, VT));
6517   }
6518 
6519   // fold (zext (load x)) -> (zext (truncate (zextload x)))
6520   // Only generate vector extloads when 1) they're legal, and 2) they are
6521   // deemed desirable by the target.
6522   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6523       ((!LegalOperations && !VT.isVector() &&
6524         !cast<LoadSDNode>(N0)->isVolatile()) ||
6525        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6526     bool DoXform = true;
6527     SmallVector<SDNode*, 4> SetCCs;
6528     if (!N0.hasOneUse())
6529       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6530     if (VT.isVector())
6531       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6532     if (DoXform) {
6533       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6534       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6535                                        LN0->getChain(),
6536                                        LN0->getBasePtr(), N0.getValueType(),
6537                                        LN0->getMemOperand());
6538       CombineTo(N, ExtLoad);
6539       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6540                                   N0.getValueType(), ExtLoad);
6541       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6542 
6543       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6544                       ISD::ZERO_EXTEND);
6545       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6546     }
6547   }
6548 
6549   // fold (zext (load x)) to multiple smaller zextloads.
6550   // Only on illegal but splittable vectors.
6551   if (SDValue ExtLoad = CombineExtLoad(N))
6552     return ExtLoad;
6553 
6554   // fold (zext (and/or/xor (load x), cst)) ->
6555   //      (and/or/xor (zextload x), (zext cst))
6556   // Unless (and (load x) cst) will match as a zextload already and has
6557   // additional users.
6558   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6559        N0.getOpcode() == ISD::XOR) &&
6560       isa<LoadSDNode>(N0.getOperand(0)) &&
6561       N0.getOperand(1).getOpcode() == ISD::Constant &&
6562       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6563       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6564     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6565     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6566       bool DoXform = true;
6567       SmallVector<SDNode*, 4> SetCCs;
6568       if (!N0.hasOneUse()) {
6569         if (N0.getOpcode() == ISD::AND) {
6570           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
6571           auto NarrowLoad = false;
6572           EVT LoadResultTy = AndC->getValueType(0);
6573           EVT ExtVT, LoadedVT;
6574           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
6575                                NarrowLoad))
6576             DoXform = false;
6577         }
6578         if (DoXform)
6579           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
6580                                             ISD::ZERO_EXTEND, SetCCs, TLI);
6581       }
6582       if (DoXform) {
6583         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6584                                          LN0->getChain(), LN0->getBasePtr(),
6585                                          LN0->getMemoryVT(),
6586                                          LN0->getMemOperand());
6587         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6588         Mask = Mask.zext(VT.getSizeInBits());
6589         SDLoc DL(N);
6590         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6591                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6592         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6593                                     SDLoc(N0.getOperand(0)),
6594                                     N0.getOperand(0).getValueType(), ExtLoad);
6595         CombineTo(N, And);
6596         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6597         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6598                         ISD::ZERO_EXTEND);
6599         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6600       }
6601     }
6602   }
6603 
6604   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6605   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6606   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6607       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6608     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6609     EVT MemVT = LN0->getMemoryVT();
6610     if ((!LegalOperations && !LN0->isVolatile()) ||
6611         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6612       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6613                                        LN0->getChain(),
6614                                        LN0->getBasePtr(), MemVT,
6615                                        LN0->getMemOperand());
6616       CombineTo(N, ExtLoad);
6617       CombineTo(N0.getNode(),
6618                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6619                             ExtLoad),
6620                 ExtLoad.getValue(1));
6621       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6622     }
6623   }
6624 
6625   if (N0.getOpcode() == ISD::SETCC) {
6626     // Only do this before legalize for now.
6627     if (!LegalOperations && VT.isVector() &&
6628         N0.getValueType().getVectorElementType() == MVT::i1) {
6629       EVT N00VT = N0.getOperand(0).getValueType();
6630       if (getSetCCResultType(N00VT) == N0.getValueType())
6631         return SDValue();
6632 
6633       // We know that the # elements of the results is the same as the #
6634       // elements of the compare (and the # elements of the compare result for
6635       // that matter). Check to see that they are the same size. If so, we know
6636       // that the element size of the sext'd result matches the element size of
6637       // the compare operands.
6638       SDLoc DL(N);
6639       SDValue VecOnes = DAG.getConstant(1, DL, VT);
6640       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
6641         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6642         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
6643                                      N0.getOperand(1), N0.getOperand(2));
6644         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
6645       }
6646 
6647       // If the desired elements are smaller or larger than the source
6648       // elements we can use a matching integer vector type and then
6649       // truncate/sign extend.
6650       EVT MatchingElementType = EVT::getIntegerVT(
6651           *DAG.getContext(), N00VT.getScalarSizeInBits());
6652       EVT MatchingVectorType = EVT::getVectorVT(
6653           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
6654       SDValue VsetCC =
6655           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
6656                       N0.getOperand(1), N0.getOperand(2));
6657       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
6658                          VecOnes);
6659     }
6660 
6661     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6662     SDLoc DL(N);
6663     if (SDValue SCC = SimplifySelectCC(
6664             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6665             DAG.getConstant(0, DL, VT),
6666             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6667       return SCC;
6668   }
6669 
6670   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6671   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6672       isa<ConstantSDNode>(N0.getOperand(1)) &&
6673       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6674       N0.hasOneUse()) {
6675     SDValue ShAmt = N0.getOperand(1);
6676     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6677     if (N0.getOpcode() == ISD::SHL) {
6678       SDValue InnerZExt = N0.getOperand(0);
6679       // If the original shl may be shifting out bits, do not perform this
6680       // transformation.
6681       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
6682         InnerZExt.getOperand(0).getValueSizeInBits();
6683       if (ShAmtVal > KnownZeroBits)
6684         return SDValue();
6685     }
6686 
6687     SDLoc DL(N);
6688 
6689     // Ensure that the shift amount is wide enough for the shifted value.
6690     if (VT.getSizeInBits() >= 256)
6691       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6692 
6693     return DAG.getNode(N0.getOpcode(), DL, VT,
6694                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6695                        ShAmt);
6696   }
6697 
6698   return SDValue();
6699 }
6700 
6701 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6702   SDValue N0 = N->getOperand(0);
6703   EVT VT = N->getValueType(0);
6704 
6705   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6706                                               LegalOperations))
6707     return SDValue(Res, 0);
6708 
6709   // fold (aext (aext x)) -> (aext x)
6710   // fold (aext (zext x)) -> (zext x)
6711   // fold (aext (sext x)) -> (sext x)
6712   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
6713       N0.getOpcode() == ISD::ZERO_EXTEND ||
6714       N0.getOpcode() == ISD::SIGN_EXTEND)
6715     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6716 
6717   // fold (aext (truncate (load x))) -> (aext (smaller load x))
6718   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6719   if (N0.getOpcode() == ISD::TRUNCATE) {
6720     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6721       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6722       if (NarrowLoad.getNode() != N0.getNode()) {
6723         CombineTo(N0.getNode(), NarrowLoad);
6724         // CombineTo deleted the truncate, if needed, but not what's under it.
6725         AddToWorklist(oye);
6726       }
6727       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6728     }
6729   }
6730 
6731   // fold (aext (truncate x))
6732   if (N0.getOpcode() == ISD::TRUNCATE) {
6733     SDValue TruncOp = N0.getOperand(0);
6734     if (TruncOp.getValueType() == VT)
6735       return TruncOp; // x iff x size == zext size.
6736     if (TruncOp.getValueType().bitsGT(VT))
6737       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6738     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6739   }
6740 
6741   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6742   // if the trunc is not free.
6743   if (N0.getOpcode() == ISD::AND &&
6744       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6745       N0.getOperand(1).getOpcode() == ISD::Constant &&
6746       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6747                           N0.getValueType())) {
6748     SDValue X = N0.getOperand(0).getOperand(0);
6749     if (X.getValueType().bitsLT(VT)) {
6750       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
6751     } else if (X.getValueType().bitsGT(VT)) {
6752       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
6753     }
6754     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6755     Mask = Mask.zext(VT.getSizeInBits());
6756     SDLoc DL(N);
6757     return DAG.getNode(ISD::AND, DL, VT,
6758                        X, DAG.getConstant(Mask, DL, VT));
6759   }
6760 
6761   // fold (aext (load x)) -> (aext (truncate (extload x)))
6762   // None of the supported targets knows how to perform load and any_ext
6763   // on vectors in one instruction.  We only perform this transformation on
6764   // scalars.
6765   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6766       ISD::isUNINDEXEDLoad(N0.getNode()) &&
6767       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6768     bool DoXform = true;
6769     SmallVector<SDNode*, 4> SetCCs;
6770     if (!N0.hasOneUse())
6771       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6772     if (DoXform) {
6773       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6774       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6775                                        LN0->getChain(),
6776                                        LN0->getBasePtr(), N0.getValueType(),
6777                                        LN0->getMemOperand());
6778       CombineTo(N, ExtLoad);
6779       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6780                                   N0.getValueType(), ExtLoad);
6781       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6782       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6783                       ISD::ANY_EXTEND);
6784       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6785     }
6786   }
6787 
6788   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6789   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6790   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
6791   if (N0.getOpcode() == ISD::LOAD &&
6792       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6793       N0.hasOneUse()) {
6794     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6795     ISD::LoadExtType ExtType = LN0->getExtensionType();
6796     EVT MemVT = LN0->getMemoryVT();
6797     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6798       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6799                                        VT, LN0->getChain(), LN0->getBasePtr(),
6800                                        MemVT, LN0->getMemOperand());
6801       CombineTo(N, ExtLoad);
6802       CombineTo(N0.getNode(),
6803                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6804                             N0.getValueType(), ExtLoad),
6805                 ExtLoad.getValue(1));
6806       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6807     }
6808   }
6809 
6810   if (N0.getOpcode() == ISD::SETCC) {
6811     // For vectors:
6812     // aext(setcc) -> vsetcc
6813     // aext(setcc) -> truncate(vsetcc)
6814     // aext(setcc) -> aext(vsetcc)
6815     // Only do this before legalize for now.
6816     if (VT.isVector() && !LegalOperations) {
6817       EVT N0VT = N0.getOperand(0).getValueType();
6818         // We know that the # elements of the results is the same as the
6819         // # elements of the compare (and the # elements of the compare result
6820         // for that matter).  Check to see that they are the same size.  If so,
6821         // we know that the element size of the sext'd result matches the
6822         // element size of the compare operands.
6823       if (VT.getSizeInBits() == N0VT.getSizeInBits())
6824         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6825                              N0.getOperand(1),
6826                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6827       // If the desired elements are smaller or larger than the source
6828       // elements we can use a matching integer vector type and then
6829       // truncate/any extend
6830       else {
6831         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6832         SDValue VsetCC =
6833           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6834                         N0.getOperand(1),
6835                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
6836         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6837       }
6838     }
6839 
6840     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6841     SDLoc DL(N);
6842     if (SDValue SCC = SimplifySelectCC(
6843             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6844             DAG.getConstant(0, DL, VT),
6845             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6846       return SCC;
6847   }
6848 
6849   return SDValue();
6850 }
6851 
6852 /// See if the specified operand can be simplified with the knowledge that only
6853 /// the bits specified by Mask are used.  If so, return the simpler operand,
6854 /// otherwise return a null SDValue.
6855 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6856   switch (V.getOpcode()) {
6857   default: break;
6858   case ISD::Constant: {
6859     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
6860     assert(CV && "Const value should be ConstSDNode.");
6861     const APInt &CVal = CV->getAPIntValue();
6862     APInt NewVal = CVal & Mask;
6863     if (NewVal != CVal)
6864       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
6865     break;
6866   }
6867   case ISD::OR:
6868   case ISD::XOR:
6869     // If the LHS or RHS don't contribute bits to the or, drop them.
6870     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
6871       return V.getOperand(1);
6872     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
6873       return V.getOperand(0);
6874     break;
6875   case ISD::SRL:
6876     // Only look at single-use SRLs.
6877     if (!V.getNode()->hasOneUse())
6878       break;
6879     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
6880       // See if we can recursively simplify the LHS.
6881       unsigned Amt = RHSC->getZExtValue();
6882 
6883       // Watch out for shift count overflow though.
6884       if (Amt >= Mask.getBitWidth()) break;
6885       APInt NewMask = Mask << Amt;
6886       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
6887         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
6888                            SimplifyLHS, V.getOperand(1));
6889     }
6890   }
6891   return SDValue();
6892 }
6893 
6894 /// If the result of a wider load is shifted to right of N  bits and then
6895 /// truncated to a narrower type and where N is a multiple of number of bits of
6896 /// the narrower type, transform it to a narrower load from address + N / num of
6897 /// bits of new type. If the result is to be extended, also fold the extension
6898 /// to form a extending load.
6899 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
6900   unsigned Opc = N->getOpcode();
6901 
6902   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
6903   SDValue N0 = N->getOperand(0);
6904   EVT VT = N->getValueType(0);
6905   EVT ExtVT = VT;
6906 
6907   // This transformation isn't valid for vector loads.
6908   if (VT.isVector())
6909     return SDValue();
6910 
6911   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
6912   // extended to VT.
6913   if (Opc == ISD::SIGN_EXTEND_INREG) {
6914     ExtType = ISD::SEXTLOAD;
6915     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6916   } else if (Opc == ISD::SRL) {
6917     // Another special-case: SRL is basically zero-extending a narrower value.
6918     ExtType = ISD::ZEXTLOAD;
6919     N0 = SDValue(N, 0);
6920     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6921     if (!N01) return SDValue();
6922     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
6923                               VT.getSizeInBits() - N01->getZExtValue());
6924   }
6925   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
6926     return SDValue();
6927 
6928   unsigned EVTBits = ExtVT.getSizeInBits();
6929 
6930   // Do not generate loads of non-round integer types since these can
6931   // be expensive (and would be wrong if the type is not byte sized).
6932   if (!ExtVT.isRound())
6933     return SDValue();
6934 
6935   unsigned ShAmt = 0;
6936   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6937     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6938       ShAmt = N01->getZExtValue();
6939       // Is the shift amount a multiple of size of VT?
6940       if ((ShAmt & (EVTBits-1)) == 0) {
6941         N0 = N0.getOperand(0);
6942         // Is the load width a multiple of size of VT?
6943         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
6944           return SDValue();
6945       }
6946 
6947       // At this point, we must have a load or else we can't do the transform.
6948       if (!isa<LoadSDNode>(N0)) return SDValue();
6949 
6950       // Because a SRL must be assumed to *need* to zero-extend the high bits
6951       // (as opposed to anyext the high bits), we can't combine the zextload
6952       // lowering of SRL and an sextload.
6953       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
6954         return SDValue();
6955 
6956       // If the shift amount is larger than the input type then we're not
6957       // accessing any of the loaded bytes.  If the load was a zextload/extload
6958       // then the result of the shift+trunc is zero/undef (handled elsewhere).
6959       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
6960         return SDValue();
6961     }
6962   }
6963 
6964   // If the load is shifted left (and the result isn't shifted back right),
6965   // we can fold the truncate through the shift.
6966   unsigned ShLeftAmt = 0;
6967   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
6968       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
6969     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6970       ShLeftAmt = N01->getZExtValue();
6971       N0 = N0.getOperand(0);
6972     }
6973   }
6974 
6975   // If we haven't found a load, we can't narrow it.  Don't transform one with
6976   // multiple uses, this would require adding a new load.
6977   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
6978     return SDValue();
6979 
6980   // Don't change the width of a volatile load.
6981   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6982   if (LN0->isVolatile())
6983     return SDValue();
6984 
6985   // Verify that we are actually reducing a load width here.
6986   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
6987     return SDValue();
6988 
6989   // For the transform to be legal, the load must produce only two values
6990   // (the value loaded and the chain).  Don't transform a pre-increment
6991   // load, for example, which produces an extra value.  Otherwise the
6992   // transformation is not equivalent, and the downstream logic to replace
6993   // uses gets things wrong.
6994   if (LN0->getNumValues() > 2)
6995     return SDValue();
6996 
6997   // If the load that we're shrinking is an extload and we're not just
6998   // discarding the extension we can't simply shrink the load. Bail.
6999   // TODO: It would be possible to merge the extensions in some cases.
7000   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7001       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7002     return SDValue();
7003 
7004   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7005     return SDValue();
7006 
7007   EVT PtrType = N0.getOperand(1).getValueType();
7008 
7009   if (PtrType == MVT::Untyped || PtrType.isExtended())
7010     // It's not possible to generate a constant of extended or untyped type.
7011     return SDValue();
7012 
7013   // For big endian targets, we need to adjust the offset to the pointer to
7014   // load the correct bytes.
7015   if (DAG.getDataLayout().isBigEndian()) {
7016     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7017     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7018     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7019   }
7020 
7021   uint64_t PtrOff = ShAmt / 8;
7022   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7023   SDLoc DL(LN0);
7024   // The original load itself didn't wrap, so an offset within it doesn't.
7025   SDNodeFlags Flags;
7026   Flags.setNoUnsignedWrap(true);
7027   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7028                                PtrType, LN0->getBasePtr(),
7029                                DAG.getConstant(PtrOff, DL, PtrType),
7030                                &Flags);
7031   AddToWorklist(NewPtr.getNode());
7032 
7033   SDValue Load;
7034   if (ExtType == ISD::NON_EXTLOAD)
7035     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7036                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7037                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7038   else
7039     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7040                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7041                           NewAlign, LN0->getMemOperand()->getFlags(),
7042                           LN0->getAAInfo());
7043 
7044   // Replace the old load's chain with the new load's chain.
7045   WorklistRemover DeadNodes(*this);
7046   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7047 
7048   // Shift the result left, if we've swallowed a left shift.
7049   SDValue Result = Load;
7050   if (ShLeftAmt != 0) {
7051     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7052     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7053       ShImmTy = VT;
7054     // If the shift amount is as large as the result size (but, presumably,
7055     // no larger than the source) then the useful bits of the result are
7056     // zero; we can't simply return the shortened shift, because the result
7057     // of that operation is undefined.
7058     SDLoc DL(N0);
7059     if (ShLeftAmt >= VT.getSizeInBits())
7060       Result = DAG.getConstant(0, DL, VT);
7061     else
7062       Result = DAG.getNode(ISD::SHL, DL, VT,
7063                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7064   }
7065 
7066   // Return the new loaded value.
7067   return Result;
7068 }
7069 
7070 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7071   SDValue N0 = N->getOperand(0);
7072   SDValue N1 = N->getOperand(1);
7073   EVT VT = N->getValueType(0);
7074   EVT EVT = cast<VTSDNode>(N1)->getVT();
7075   unsigned VTBits = VT.getScalarSizeInBits();
7076   unsigned EVTBits = EVT.getScalarSizeInBits();
7077 
7078   if (N0.isUndef())
7079     return DAG.getUNDEF(VT);
7080 
7081   // fold (sext_in_reg c1) -> c1
7082   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7083     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7084 
7085   // If the input is already sign extended, just drop the extension.
7086   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7087     return N0;
7088 
7089   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7090   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7091       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7092     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7093                        N0.getOperand(0), N1);
7094 
7095   // fold (sext_in_reg (sext x)) -> (sext x)
7096   // fold (sext_in_reg (aext x)) -> (sext x)
7097   // if x is small enough.
7098   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7099     SDValue N00 = N0.getOperand(0);
7100     if (N00.getScalarValueSizeInBits() <= EVTBits &&
7101         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7102       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7103   }
7104 
7105   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7106   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
7107     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7108 
7109   // fold operands of sext_in_reg based on knowledge that the top bits are not
7110   // demanded.
7111   if (SimplifyDemandedBits(SDValue(N, 0)))
7112     return SDValue(N, 0);
7113 
7114   // fold (sext_in_reg (load x)) -> (smaller sextload x)
7115   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7116   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7117     return NarrowLoad;
7118 
7119   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7120   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7121   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7122   if (N0.getOpcode() == ISD::SRL) {
7123     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7124       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7125         // We can turn this into an SRA iff the input to the SRL is already sign
7126         // extended enough.
7127         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7128         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7129           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7130                              N0.getOperand(0), N0.getOperand(1));
7131       }
7132   }
7133 
7134   // fold (sext_inreg (extload x)) -> (sextload x)
7135   if (ISD::isEXTLoad(N0.getNode()) &&
7136       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7137       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7138       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7139        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7140     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7141     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7142                                      LN0->getChain(),
7143                                      LN0->getBasePtr(), EVT,
7144                                      LN0->getMemOperand());
7145     CombineTo(N, ExtLoad);
7146     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7147     AddToWorklist(ExtLoad.getNode());
7148     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7149   }
7150   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7151   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7152       N0.hasOneUse() &&
7153       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7154       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7155        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7156     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7157     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7158                                      LN0->getChain(),
7159                                      LN0->getBasePtr(), EVT,
7160                                      LN0->getMemOperand());
7161     CombineTo(N, ExtLoad);
7162     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7163     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7164   }
7165 
7166   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7167   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7168     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7169                                            N0.getOperand(1), false))
7170       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7171                          BSwap, N1);
7172   }
7173 
7174   return SDValue();
7175 }
7176 
7177 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7178   SDValue N0 = N->getOperand(0);
7179   EVT VT = N->getValueType(0);
7180 
7181   if (N0.isUndef())
7182     return DAG.getUNDEF(VT);
7183 
7184   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7185                                               LegalOperations))
7186     return SDValue(Res, 0);
7187 
7188   return SDValue();
7189 }
7190 
7191 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7192   SDValue N0 = N->getOperand(0);
7193   EVT VT = N->getValueType(0);
7194 
7195   if (N0.isUndef())
7196     return DAG.getUNDEF(VT);
7197 
7198   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7199                                               LegalOperations))
7200     return SDValue(Res, 0);
7201 
7202   return SDValue();
7203 }
7204 
7205 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7206   SDValue N0 = N->getOperand(0);
7207   EVT VT = N->getValueType(0);
7208   bool isLE = DAG.getDataLayout().isLittleEndian();
7209 
7210   // noop truncate
7211   if (N0.getValueType() == N->getValueType(0))
7212     return N0;
7213   // fold (truncate c1) -> c1
7214   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7215     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7216   // fold (truncate (truncate x)) -> (truncate x)
7217   if (N0.getOpcode() == ISD::TRUNCATE)
7218     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7219   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7220   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7221       N0.getOpcode() == ISD::SIGN_EXTEND ||
7222       N0.getOpcode() == ISD::ANY_EXTEND) {
7223     // if the source is smaller than the dest, we still need an extend.
7224     if (N0.getOperand(0).getValueType().bitsLT(VT))
7225       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7226     // if the source is larger than the dest, than we just need the truncate.
7227     if (N0.getOperand(0).getValueType().bitsGT(VT))
7228       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7229     // if the source and dest are the same type, we can drop both the extend
7230     // and the truncate.
7231     return N0.getOperand(0);
7232   }
7233 
7234   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
7235   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
7236     return SDValue();
7237 
7238   // Fold extract-and-trunc into a narrow extract. For example:
7239   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
7240   //   i32 y = TRUNCATE(i64 x)
7241   //        -- becomes --
7242   //   v16i8 b = BITCAST (v2i64 val)
7243   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
7244   //
7245   // Note: We only run this optimization after type legalization (which often
7246   // creates this pattern) and before operation legalization after which
7247   // we need to be more careful about the vector instructions that we generate.
7248   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7249       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
7250 
7251     EVT VecTy = N0.getOperand(0).getValueType();
7252     EVT ExTy = N0.getValueType();
7253     EVT TrTy = N->getValueType(0);
7254 
7255     unsigned NumElem = VecTy.getVectorNumElements();
7256     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
7257 
7258     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
7259     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
7260 
7261     SDValue EltNo = N0->getOperand(1);
7262     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
7263       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
7264       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7265       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
7266 
7267       SDLoc DL(N);
7268       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
7269                          DAG.getBitcast(NVT, N0.getOperand(0)),
7270                          DAG.getConstant(Index, DL, IndexTy));
7271     }
7272   }
7273 
7274   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
7275   if (N0.getOpcode() == ISD::SELECT) {
7276     EVT SrcVT = N0.getValueType();
7277     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
7278         TLI.isTruncateFree(SrcVT, VT)) {
7279       SDLoc SL(N0);
7280       SDValue Cond = N0.getOperand(0);
7281       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
7282       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
7283       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
7284     }
7285   }
7286 
7287   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
7288   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7289       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
7290       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
7291     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
7292       uint64_t Amt = CAmt->getZExtValue();
7293       unsigned Size = VT.getScalarSizeInBits();
7294 
7295       if (Amt < Size) {
7296         SDLoc SL(N);
7297         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
7298 
7299         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
7300         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
7301                            DAG.getConstant(Amt, SL, AmtVT));
7302       }
7303     }
7304   }
7305 
7306   // Fold a series of buildvector, bitcast, and truncate if possible.
7307   // For example fold
7308   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
7309   //   (2xi32 (buildvector x, y)).
7310   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
7311       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
7312       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
7313       N0.getOperand(0).hasOneUse()) {
7314 
7315     SDValue BuildVect = N0.getOperand(0);
7316     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
7317     EVT TruncVecEltTy = VT.getVectorElementType();
7318 
7319     // Check that the element types match.
7320     if (BuildVectEltTy == TruncVecEltTy) {
7321       // Now we only need to compute the offset of the truncated elements.
7322       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
7323       unsigned TruncVecNumElts = VT.getVectorNumElements();
7324       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
7325 
7326       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
7327              "Invalid number of elements");
7328 
7329       SmallVector<SDValue, 8> Opnds;
7330       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
7331         Opnds.push_back(BuildVect.getOperand(i));
7332 
7333       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
7334     }
7335   }
7336 
7337   // See if we can simplify the input to this truncate through knowledge that
7338   // only the low bits are being used.
7339   // For example "trunc (or (shl x, 8), y)" // -> trunc y
7340   // Currently we only perform this optimization on scalars because vectors
7341   // may have different active low bits.
7342   if (!VT.isVector()) {
7343     if (SDValue Shorter =
7344             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
7345                                                      VT.getSizeInBits())))
7346       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
7347   }
7348   // fold (truncate (load x)) -> (smaller load x)
7349   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7350   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7351     if (SDValue Reduced = ReduceLoadWidth(N))
7352       return Reduced;
7353 
7354     // Handle the case where the load remains an extending load even
7355     // after truncation.
7356     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7357       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7358       if (!LN0->isVolatile() &&
7359           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7360         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7361                                          VT, LN0->getChain(), LN0->getBasePtr(),
7362                                          LN0->getMemoryVT(),
7363                                          LN0->getMemOperand());
7364         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7365         return NewLoad;
7366       }
7367     }
7368   }
7369   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7370   // where ... are all 'undef'.
7371   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7372     SmallVector<EVT, 8> VTs;
7373     SDValue V;
7374     unsigned Idx = 0;
7375     unsigned NumDefs = 0;
7376 
7377     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7378       SDValue X = N0.getOperand(i);
7379       if (!X.isUndef()) {
7380         V = X;
7381         Idx = i;
7382         NumDefs++;
7383       }
7384       // Stop if more than one members are non-undef.
7385       if (NumDefs > 1)
7386         break;
7387       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
7388                                      VT.getVectorElementType(),
7389                                      X.getValueType().getVectorNumElements()));
7390     }
7391 
7392     if (NumDefs == 0)
7393       return DAG.getUNDEF(VT);
7394 
7395     if (NumDefs == 1) {
7396       assert(V.getNode() && "The single defined operand is empty!");
7397       SmallVector<SDValue, 8> Opnds;
7398       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7399         if (i != Idx) {
7400           Opnds.push_back(DAG.getUNDEF(VTs[i]));
7401           continue;
7402         }
7403         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7404         AddToWorklist(NV.getNode());
7405         Opnds.push_back(NV);
7406       }
7407       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7408     }
7409   }
7410 
7411   // Fold truncate of a bitcast of a vector to an extract of the low vector
7412   // element.
7413   //
7414   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7415   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7416     SDValue VecSrc = N0.getOperand(0);
7417     EVT SrcVT = VecSrc.getValueType();
7418     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
7419         (!LegalOperations ||
7420          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
7421       SDLoc SL(N);
7422 
7423       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7424       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7425                          VecSrc, DAG.getConstant(0, SL, IdxVT));
7426     }
7427   }
7428 
7429   // Simplify the operands using demanded-bits information.
7430   if (!VT.isVector() &&
7431       SimplifyDemandedBits(SDValue(N, 0)))
7432     return SDValue(N, 0);
7433 
7434   return SDValue();
7435 }
7436 
7437 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7438   SDValue Elt = N->getOperand(i);
7439   if (Elt.getOpcode() != ISD::MERGE_VALUES)
7440     return Elt.getNode();
7441   return Elt.getOperand(Elt.getResNo()).getNode();
7442 }
7443 
7444 /// build_pair (load, load) -> load
7445 /// if load locations are consecutive.
7446 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7447   assert(N->getOpcode() == ISD::BUILD_PAIR);
7448 
7449   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
7450   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
7451   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7452       LD1->getAddressSpace() != LD2->getAddressSpace())
7453     return SDValue();
7454   EVT LD1VT = LD1->getValueType(0);
7455   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
7456   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
7457       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
7458     unsigned Align = LD1->getAlignment();
7459     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7460         VT.getTypeForEVT(*DAG.getContext()));
7461 
7462     if (NewAlign <= Align &&
7463         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7464       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
7465                          LD1->getPointerInfo(), Align);
7466   }
7467 
7468   return SDValue();
7469 }
7470 
7471 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
7472   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
7473   // and Lo parts; on big-endian machines it doesn't.
7474   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
7475 }
7476 
7477 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
7478                                     const TargetLowering &TLI) {
7479   // If this is not a bitcast to an FP type or if the target doesn't have
7480   // IEEE754-compliant FP logic, we're done.
7481   EVT VT = N->getValueType(0);
7482   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
7483     return SDValue();
7484 
7485   // TODO: Use splat values for the constant-checking below and remove this
7486   // restriction.
7487   SDValue N0 = N->getOperand(0);
7488   EVT SourceVT = N0.getValueType();
7489   if (SourceVT.isVector())
7490     return SDValue();
7491 
7492   unsigned FPOpcode;
7493   APInt SignMask;
7494   switch (N0.getOpcode()) {
7495   case ISD::AND:
7496     FPOpcode = ISD::FABS;
7497     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
7498     break;
7499   case ISD::XOR:
7500     FPOpcode = ISD::FNEG;
7501     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
7502     break;
7503   // TODO: ISD::OR --> ISD::FNABS?
7504   default:
7505     return SDValue();
7506   }
7507 
7508   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
7509   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
7510   SDValue LogicOp0 = N0.getOperand(0);
7511   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7512   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
7513       LogicOp0.getOpcode() == ISD::BITCAST &&
7514       LogicOp0->getOperand(0).getValueType() == VT)
7515     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
7516 
7517   return SDValue();
7518 }
7519 
7520 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7521   SDValue N0 = N->getOperand(0);
7522   EVT VT = N->getValueType(0);
7523 
7524   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7525   // Only do this before legalize, since afterward the target may be depending
7526   // on the bitconvert.
7527   // First check to see if this is all constant.
7528   if (!LegalTypes &&
7529       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7530       VT.isVector()) {
7531     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7532 
7533     EVT DestEltVT = N->getValueType(0).getVectorElementType();
7534     assert(!DestEltVT.isVector() &&
7535            "Element type of vector ValueType must not be vector!");
7536     if (isSimple)
7537       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7538   }
7539 
7540   // If the input is a constant, let getNode fold it.
7541   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7542     // If we can't allow illegal operations, we need to check that this is just
7543     // a fp -> int or int -> conversion and that the resulting operation will
7544     // be legal.
7545     if (!LegalOperations ||
7546         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7547          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7548         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7549          TLI.isOperationLegal(ISD::Constant, VT)))
7550       return DAG.getBitcast(VT, N0);
7551   }
7552 
7553   // (conv (conv x, t1), t2) -> (conv x, t2)
7554   if (N0.getOpcode() == ISD::BITCAST)
7555     return DAG.getBitcast(VT, N0.getOperand(0));
7556 
7557   // fold (conv (load x)) -> (load (conv*)x)
7558   // If the resultant load doesn't need a higher alignment than the original!
7559   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7560       // Do not change the width of a volatile load.
7561       !cast<LoadSDNode>(N0)->isVolatile() &&
7562       // Do not remove the cast if the types differ in endian layout.
7563       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7564           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7565       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7566       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7567     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7568     unsigned OrigAlign = LN0->getAlignment();
7569 
7570     bool Fast = false;
7571     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
7572                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
7573         Fast) {
7574       SDValue Load =
7575           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7576                       LN0->getPointerInfo(), OrigAlign,
7577                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7578       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7579       return Load;
7580     }
7581   }
7582 
7583   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
7584     return V;
7585 
7586   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7587   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7588   //
7589   // For ppc_fp128:
7590   // fold (bitcast (fneg x)) ->
7591   //     flipbit = signbit
7592   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7593   //
7594   // fold (bitcast (fabs x)) ->
7595   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
7596   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7597   // This often reduces constant pool loads.
7598   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7599        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7600       N0.getNode()->hasOneUse() && VT.isInteger() &&
7601       !VT.isVector() && !N0.getValueType().isVector()) {
7602     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
7603     AddToWorklist(NewConv.getNode());
7604 
7605     SDLoc DL(N);
7606     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7607       assert(VT.getSizeInBits() == 128);
7608       SDValue SignBit = DAG.getConstant(
7609           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
7610       SDValue FlipBit;
7611       if (N0.getOpcode() == ISD::FNEG) {
7612         FlipBit = SignBit;
7613         AddToWorklist(FlipBit.getNode());
7614       } else {
7615         assert(N0.getOpcode() == ISD::FABS);
7616         SDValue Hi =
7617             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
7618                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7619                                               SDLoc(NewConv)));
7620         AddToWorklist(Hi.getNode());
7621         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
7622         AddToWorklist(FlipBit.getNode());
7623       }
7624       SDValue FlipBits =
7625           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7626       AddToWorklist(FlipBits.getNode());
7627       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
7628     }
7629     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7630     if (N0.getOpcode() == ISD::FNEG)
7631       return DAG.getNode(ISD::XOR, DL, VT,
7632                          NewConv, DAG.getConstant(SignBit, DL, VT));
7633     assert(N0.getOpcode() == ISD::FABS);
7634     return DAG.getNode(ISD::AND, DL, VT,
7635                        NewConv, DAG.getConstant(~SignBit, DL, VT));
7636   }
7637 
7638   // fold (bitconvert (fcopysign cst, x)) ->
7639   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
7640   // Note that we don't handle (copysign x, cst) because this can always be
7641   // folded to an fneg or fabs.
7642   //
7643   // For ppc_fp128:
7644   // fold (bitcast (fcopysign cst, x)) ->
7645   //     flipbit = (and (extract_element
7646   //                     (xor (bitcast cst), (bitcast x)), 0),
7647   //                    signbit)
7648   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
7649   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7650       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7651       VT.isInteger() && !VT.isVector()) {
7652     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
7653     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7654     if (isTypeLegal(IntXVT)) {
7655       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
7656       AddToWorklist(X.getNode());
7657 
7658       // If X has a different width than the result/lhs, sext it or truncate it.
7659       unsigned VTWidth = VT.getSizeInBits();
7660       if (OrigXWidth < VTWidth) {
7661         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7662         AddToWorklist(X.getNode());
7663       } else if (OrigXWidth > VTWidth) {
7664         // To get the sign bit in the right place, we have to shift it right
7665         // before truncating.
7666         SDLoc DL(X);
7667         X = DAG.getNode(ISD::SRL, DL,
7668                         X.getValueType(), X,
7669                         DAG.getConstant(OrigXWidth-VTWidth, DL,
7670                                         X.getValueType()));
7671         AddToWorklist(X.getNode());
7672         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7673         AddToWorklist(X.getNode());
7674       }
7675 
7676       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7677         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
7678         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7679         AddToWorklist(Cst.getNode());
7680         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
7681         AddToWorklist(X.getNode());
7682         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
7683         AddToWorklist(XorResult.getNode());
7684         SDValue XorResult64 = DAG.getNode(
7685             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
7686             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7687                                   SDLoc(XorResult)));
7688         AddToWorklist(XorResult64.getNode());
7689         SDValue FlipBit =
7690             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
7691                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
7692         AddToWorklist(FlipBit.getNode());
7693         SDValue FlipBits =
7694             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7695         AddToWorklist(FlipBits.getNode());
7696         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
7697       }
7698       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7699       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7700                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
7701       AddToWorklist(X.getNode());
7702 
7703       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7704       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7705                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7706       AddToWorklist(Cst.getNode());
7707 
7708       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7709     }
7710   }
7711 
7712   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7713   if (N0.getOpcode() == ISD::BUILD_PAIR)
7714     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
7715       return CombineLD;
7716 
7717   // Remove double bitcasts from shuffles - this is often a legacy of
7718   // XformToShuffleWithZero being used to combine bitmaskings (of
7719   // float vectors bitcast to integer vectors) into shuffles.
7720   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7721   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7722       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7723       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
7724       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
7725     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7726 
7727     // If operands are a bitcast, peek through if it casts the original VT.
7728     // If operands are a constant, just bitcast back to original VT.
7729     auto PeekThroughBitcast = [&](SDValue Op) {
7730       if (Op.getOpcode() == ISD::BITCAST &&
7731           Op.getOperand(0).getValueType() == VT)
7732         return SDValue(Op.getOperand(0));
7733       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
7734           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
7735         return DAG.getBitcast(VT, Op);
7736       return SDValue();
7737     };
7738 
7739     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7740     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7741     if (!(SV0 && SV1))
7742       return SDValue();
7743 
7744     int MaskScale =
7745         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
7746     SmallVector<int, 8> NewMask;
7747     for (int M : SVN->getMask())
7748       for (int i = 0; i != MaskScale; ++i)
7749         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7750 
7751     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7752     if (!LegalMask) {
7753       std::swap(SV0, SV1);
7754       ShuffleVectorSDNode::commuteMask(NewMask);
7755       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7756     }
7757 
7758     if (LegalMask)
7759       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7760   }
7761 
7762   return SDValue();
7763 }
7764 
7765 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7766   EVT VT = N->getValueType(0);
7767   return CombineConsecutiveLoads(N, VT);
7768 }
7769 
7770 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7771 /// operands. DstEltVT indicates the destination element value type.
7772 SDValue DAGCombiner::
7773 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7774   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7775 
7776   // If this is already the right type, we're done.
7777   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7778 
7779   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7780   unsigned DstBitSize = DstEltVT.getSizeInBits();
7781 
7782   // If this is a conversion of N elements of one type to N elements of another
7783   // type, convert each element.  This handles FP<->INT cases.
7784   if (SrcBitSize == DstBitSize) {
7785     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7786                               BV->getValueType(0).getVectorNumElements());
7787 
7788     // Due to the FP element handling below calling this routine recursively,
7789     // we can end up with a scalar-to-vector node here.
7790     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7791       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7792                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
7793 
7794     SmallVector<SDValue, 8> Ops;
7795     for (SDValue Op : BV->op_values()) {
7796       // If the vector element type is not legal, the BUILD_VECTOR operands
7797       // are promoted and implicitly truncated.  Make that explicit here.
7798       if (Op.getValueType() != SrcEltVT)
7799         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7800       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
7801       AddToWorklist(Ops.back().getNode());
7802     }
7803     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
7804   }
7805 
7806   // Otherwise, we're growing or shrinking the elements.  To avoid having to
7807   // handle annoying details of growing/shrinking FP values, we convert them to
7808   // int first.
7809   if (SrcEltVT.isFloatingPoint()) {
7810     // Convert the input float vector to a int vector where the elements are the
7811     // same sizes.
7812     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7813     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7814     SrcEltVT = IntVT;
7815   }
7816 
7817   // Now we know the input is an integer vector.  If the output is a FP type,
7818   // convert to integer first, then to FP of the right size.
7819   if (DstEltVT.isFloatingPoint()) {
7820     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7821     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7822 
7823     // Next, convert to FP elements of the same size.
7824     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7825   }
7826 
7827   SDLoc DL(BV);
7828 
7829   // Okay, we know the src/dst types are both integers of differing types.
7830   // Handling growing first.
7831   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7832   if (SrcBitSize < DstBitSize) {
7833     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7834 
7835     SmallVector<SDValue, 8> Ops;
7836     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7837          i += NumInputsPerOutput) {
7838       bool isLE = DAG.getDataLayout().isLittleEndian();
7839       APInt NewBits = APInt(DstBitSize, 0);
7840       bool EltIsUndef = true;
7841       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7842         // Shift the previously computed bits over.
7843         NewBits <<= SrcBitSize;
7844         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7845         if (Op.isUndef()) continue;
7846         EltIsUndef = false;
7847 
7848         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
7849                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
7850       }
7851 
7852       if (EltIsUndef)
7853         Ops.push_back(DAG.getUNDEF(DstEltVT));
7854       else
7855         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
7856     }
7857 
7858     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
7859     return DAG.getBuildVector(VT, DL, Ops);
7860   }
7861 
7862   // Finally, this must be the case where we are shrinking elements: each input
7863   // turns into multiple outputs.
7864   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
7865   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7866                             NumOutputsPerInput*BV->getNumOperands());
7867   SmallVector<SDValue, 8> Ops;
7868 
7869   for (const SDValue &Op : BV->op_values()) {
7870     if (Op.isUndef()) {
7871       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
7872       continue;
7873     }
7874 
7875     APInt OpVal = cast<ConstantSDNode>(Op)->
7876                   getAPIntValue().zextOrTrunc(SrcBitSize);
7877 
7878     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
7879       APInt ThisVal = OpVal.trunc(DstBitSize);
7880       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
7881       OpVal = OpVal.lshr(DstBitSize);
7882     }
7883 
7884     // For big endian targets, swap the order of the pieces of each element.
7885     if (DAG.getDataLayout().isBigEndian())
7886       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
7887   }
7888 
7889   return DAG.getBuildVector(VT, DL, Ops);
7890 }
7891 
7892 /// Try to perform FMA combining on a given FADD node.
7893 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
7894   SDValue N0 = N->getOperand(0);
7895   SDValue N1 = N->getOperand(1);
7896   EVT VT = N->getValueType(0);
7897   SDLoc SL(N);
7898 
7899   const TargetOptions &Options = DAG.getTarget().Options;
7900   bool AllowFusion =
7901       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7902 
7903   // Floating-point multiply-add with intermediate rounding.
7904   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
7905 
7906   // Floating-point multiply-add without intermediate rounding.
7907   bool HasFMA =
7908       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7909       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
7910 
7911   // No valid opcode, do not combine.
7912   if (!HasFMAD && !HasFMA)
7913     return SDValue();
7914 
7915   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
7916   ;
7917   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
7918     return SDValue();
7919 
7920   // Always prefer FMAD to FMA for precision.
7921   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7922   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7923   bool LookThroughFPExt = TLI.isFPExtFree(VT);
7924 
7925   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
7926   // prefer to fold the multiply with fewer uses.
7927   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
7928       N1.getOpcode() == ISD::FMUL) {
7929     if (N0.getNode()->use_size() > N1.getNode()->use_size())
7930       std::swap(N0, N1);
7931   }
7932 
7933   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
7934   if (N0.getOpcode() == ISD::FMUL &&
7935       (Aggressive || N0->hasOneUse())) {
7936     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7937                        N0.getOperand(0), N0.getOperand(1), N1);
7938   }
7939 
7940   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
7941   // Note: Commutes FADD operands.
7942   if (N1.getOpcode() == ISD::FMUL &&
7943       (Aggressive || N1->hasOneUse())) {
7944     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7945                        N1.getOperand(0), N1.getOperand(1), N0);
7946   }
7947 
7948   // Look through FP_EXTEND nodes to do more combining.
7949   if (AllowFusion && LookThroughFPExt) {
7950     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7951     if (N0.getOpcode() == ISD::FP_EXTEND) {
7952       SDValue N00 = N0.getOperand(0);
7953       if (N00.getOpcode() == ISD::FMUL)
7954         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7955                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7956                                        N00.getOperand(0)),
7957                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7958                                        N00.getOperand(1)), N1);
7959     }
7960 
7961     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
7962     // Note: Commutes FADD operands.
7963     if (N1.getOpcode() == ISD::FP_EXTEND) {
7964       SDValue N10 = N1.getOperand(0);
7965       if (N10.getOpcode() == ISD::FMUL)
7966         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7967                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7968                                        N10.getOperand(0)),
7969                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7970                                        N10.getOperand(1)), N0);
7971     }
7972   }
7973 
7974   // More folding opportunities when target permits.
7975   if ((AllowFusion || HasFMAD)  && Aggressive) {
7976     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
7977     if (N0.getOpcode() == PreferredFusedOpcode &&
7978         N0.getOperand(2).getOpcode() == ISD::FMUL) {
7979       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7980                          N0.getOperand(0), N0.getOperand(1),
7981                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7982                                      N0.getOperand(2).getOperand(0),
7983                                      N0.getOperand(2).getOperand(1),
7984                                      N1));
7985     }
7986 
7987     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
7988     if (N1->getOpcode() == PreferredFusedOpcode &&
7989         N1.getOperand(2).getOpcode() == ISD::FMUL) {
7990       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7991                          N1.getOperand(0), N1.getOperand(1),
7992                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7993                                      N1.getOperand(2).getOperand(0),
7994                                      N1.getOperand(2).getOperand(1),
7995                                      N0));
7996     }
7997 
7998     if (AllowFusion && LookThroughFPExt) {
7999       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8000       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8001       auto FoldFAddFMAFPExtFMul = [&] (
8002           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8003         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8004                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8005                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8006                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8007                                        Z));
8008       };
8009       if (N0.getOpcode() == PreferredFusedOpcode) {
8010         SDValue N02 = N0.getOperand(2);
8011         if (N02.getOpcode() == ISD::FP_EXTEND) {
8012           SDValue N020 = N02.getOperand(0);
8013           if (N020.getOpcode() == ISD::FMUL)
8014             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8015                                         N020.getOperand(0), N020.getOperand(1),
8016                                         N1);
8017         }
8018       }
8019 
8020       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8021       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8022       // FIXME: This turns two single-precision and one double-precision
8023       // operation into two double-precision operations, which might not be
8024       // interesting for all targets, especially GPUs.
8025       auto FoldFAddFPExtFMAFMul = [&] (
8026           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8027         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8028                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8029                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8030                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8031                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8032                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8033                                        Z));
8034       };
8035       if (N0.getOpcode() == ISD::FP_EXTEND) {
8036         SDValue N00 = N0.getOperand(0);
8037         if (N00.getOpcode() == PreferredFusedOpcode) {
8038           SDValue N002 = N00.getOperand(2);
8039           if (N002.getOpcode() == ISD::FMUL)
8040             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
8041                                         N002.getOperand(0), N002.getOperand(1),
8042                                         N1);
8043         }
8044       }
8045 
8046       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
8047       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
8048       if (N1.getOpcode() == PreferredFusedOpcode) {
8049         SDValue N12 = N1.getOperand(2);
8050         if (N12.getOpcode() == ISD::FP_EXTEND) {
8051           SDValue N120 = N12.getOperand(0);
8052           if (N120.getOpcode() == ISD::FMUL)
8053             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
8054                                         N120.getOperand(0), N120.getOperand(1),
8055                                         N0);
8056         }
8057       }
8058 
8059       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
8060       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
8061       // FIXME: This turns two single-precision and one double-precision
8062       // operation into two double-precision operations, which might not be
8063       // interesting for all targets, especially GPUs.
8064       if (N1.getOpcode() == ISD::FP_EXTEND) {
8065         SDValue N10 = N1.getOperand(0);
8066         if (N10.getOpcode() == PreferredFusedOpcode) {
8067           SDValue N102 = N10.getOperand(2);
8068           if (N102.getOpcode() == ISD::FMUL)
8069             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
8070                                         N102.getOperand(0), N102.getOperand(1),
8071                                         N0);
8072         }
8073       }
8074     }
8075   }
8076 
8077   return SDValue();
8078 }
8079 
8080 /// Try to perform FMA combining on a given FSUB node.
8081 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8082   SDValue N0 = N->getOperand(0);
8083   SDValue N1 = N->getOperand(1);
8084   EVT VT = N->getValueType(0);
8085   SDLoc SL(N);
8086 
8087   const TargetOptions &Options = DAG.getTarget().Options;
8088   bool AllowFusion =
8089       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8090 
8091   // Floating-point multiply-add with intermediate rounding.
8092   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8093 
8094   // Floating-point multiply-add without intermediate rounding.
8095   bool HasFMA =
8096       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8097       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8098 
8099   // No valid opcode, do not combine.
8100   if (!HasFMAD && !HasFMA)
8101     return SDValue();
8102 
8103   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8104   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8105     return SDValue();
8106 
8107   // Always prefer FMAD to FMA for precision.
8108   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8109   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8110   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8111 
8112   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8113   if (N0.getOpcode() == ISD::FMUL &&
8114       (Aggressive || N0->hasOneUse())) {
8115     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8116                        N0.getOperand(0), N0.getOperand(1),
8117                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8118   }
8119 
8120   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8121   // Note: Commutes FSUB operands.
8122   if (N1.getOpcode() == ISD::FMUL &&
8123       (Aggressive || N1->hasOneUse()))
8124     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8125                        DAG.getNode(ISD::FNEG, SL, VT,
8126                                    N1.getOperand(0)),
8127                        N1.getOperand(1), N0);
8128 
8129   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8130   if (N0.getOpcode() == ISD::FNEG &&
8131       N0.getOperand(0).getOpcode() == ISD::FMUL &&
8132       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8133     SDValue N00 = N0.getOperand(0).getOperand(0);
8134     SDValue N01 = N0.getOperand(0).getOperand(1);
8135     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8136                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8137                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8138   }
8139 
8140   // Look through FP_EXTEND nodes to do more combining.
8141   if (AllowFusion && LookThroughFPExt) {
8142     // fold (fsub (fpext (fmul x, y)), z)
8143     //   -> (fma (fpext x), (fpext y), (fneg z))
8144     if (N0.getOpcode() == ISD::FP_EXTEND) {
8145       SDValue N00 = N0.getOperand(0);
8146       if (N00.getOpcode() == ISD::FMUL)
8147         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8148                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8149                                        N00.getOperand(0)),
8150                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8151                                        N00.getOperand(1)),
8152                            DAG.getNode(ISD::FNEG, SL, VT, N1));
8153     }
8154 
8155     // fold (fsub x, (fpext (fmul y, z)))
8156     //   -> (fma (fneg (fpext y)), (fpext z), x)
8157     // Note: Commutes FSUB operands.
8158     if (N1.getOpcode() == ISD::FP_EXTEND) {
8159       SDValue N10 = N1.getOperand(0);
8160       if (N10.getOpcode() == ISD::FMUL)
8161         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8162                            DAG.getNode(ISD::FNEG, SL, VT,
8163                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
8164                                                    N10.getOperand(0))),
8165                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8166                                        N10.getOperand(1)),
8167                            N0);
8168     }
8169 
8170     // fold (fsub (fpext (fneg (fmul, x, y))), z)
8171     //   -> (fneg (fma (fpext x), (fpext y), z))
8172     // Note: This could be removed with appropriate canonicalization of the
8173     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8174     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8175     // from implementing the canonicalization in visitFSUB.
8176     if (N0.getOpcode() == ISD::FP_EXTEND) {
8177       SDValue N00 = N0.getOperand(0);
8178       if (N00.getOpcode() == ISD::FNEG) {
8179         SDValue N000 = N00.getOperand(0);
8180         if (N000.getOpcode() == ISD::FMUL) {
8181           return DAG.getNode(ISD::FNEG, SL, VT,
8182                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8183                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8184                                                      N000.getOperand(0)),
8185                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8186                                                      N000.getOperand(1)),
8187                                          N1));
8188         }
8189       }
8190     }
8191 
8192     // fold (fsub (fneg (fpext (fmul, x, y))), z)
8193     //   -> (fneg (fma (fpext x)), (fpext y), z)
8194     // Note: This could be removed with appropriate canonicalization of the
8195     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8196     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8197     // from implementing the canonicalization in visitFSUB.
8198     if (N0.getOpcode() == ISD::FNEG) {
8199       SDValue N00 = N0.getOperand(0);
8200       if (N00.getOpcode() == ISD::FP_EXTEND) {
8201         SDValue N000 = N00.getOperand(0);
8202         if (N000.getOpcode() == ISD::FMUL) {
8203           return DAG.getNode(ISD::FNEG, SL, VT,
8204                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8205                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8206                                                      N000.getOperand(0)),
8207                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8208                                                      N000.getOperand(1)),
8209                                          N1));
8210         }
8211       }
8212     }
8213 
8214   }
8215 
8216   // More folding opportunities when target permits.
8217   if ((AllowFusion || HasFMAD) && Aggressive) {
8218     // fold (fsub (fma x, y, (fmul u, v)), z)
8219     //   -> (fma x, y (fma u, v, (fneg z)))
8220     if (N0.getOpcode() == PreferredFusedOpcode &&
8221         N0.getOperand(2).getOpcode() == ISD::FMUL) {
8222       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8223                          N0.getOperand(0), N0.getOperand(1),
8224                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8225                                      N0.getOperand(2).getOperand(0),
8226                                      N0.getOperand(2).getOperand(1),
8227                                      DAG.getNode(ISD::FNEG, SL, VT,
8228                                                  N1)));
8229     }
8230 
8231     // fold (fsub x, (fma y, z, (fmul u, v)))
8232     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
8233     if (N1.getOpcode() == PreferredFusedOpcode &&
8234         N1.getOperand(2).getOpcode() == ISD::FMUL) {
8235       SDValue N20 = N1.getOperand(2).getOperand(0);
8236       SDValue N21 = N1.getOperand(2).getOperand(1);
8237       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8238                          DAG.getNode(ISD::FNEG, SL, VT,
8239                                      N1.getOperand(0)),
8240                          N1.getOperand(1),
8241                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8242                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
8243 
8244                                      N21, N0));
8245     }
8246 
8247     if (AllowFusion && LookThroughFPExt) {
8248       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
8249       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
8250       if (N0.getOpcode() == PreferredFusedOpcode) {
8251         SDValue N02 = N0.getOperand(2);
8252         if (N02.getOpcode() == ISD::FP_EXTEND) {
8253           SDValue N020 = N02.getOperand(0);
8254           if (N020.getOpcode() == ISD::FMUL)
8255             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8256                                N0.getOperand(0), N0.getOperand(1),
8257                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8258                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8259                                                        N020.getOperand(0)),
8260                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8261                                                        N020.getOperand(1)),
8262                                            DAG.getNode(ISD::FNEG, SL, VT,
8263                                                        N1)));
8264         }
8265       }
8266 
8267       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
8268       //   -> (fma (fpext x), (fpext y),
8269       //           (fma (fpext u), (fpext v), (fneg z)))
8270       // FIXME: This turns two single-precision and one double-precision
8271       // operation into two double-precision operations, which might not be
8272       // interesting for all targets, especially GPUs.
8273       if (N0.getOpcode() == ISD::FP_EXTEND) {
8274         SDValue N00 = N0.getOperand(0);
8275         if (N00.getOpcode() == PreferredFusedOpcode) {
8276           SDValue N002 = N00.getOperand(2);
8277           if (N002.getOpcode() == ISD::FMUL)
8278             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8279                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8280                                            N00.getOperand(0)),
8281                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8282                                            N00.getOperand(1)),
8283                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8284                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8285                                                        N002.getOperand(0)),
8286                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8287                                                        N002.getOperand(1)),
8288                                            DAG.getNode(ISD::FNEG, SL, VT,
8289                                                        N1)));
8290         }
8291       }
8292 
8293       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
8294       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
8295       if (N1.getOpcode() == PreferredFusedOpcode &&
8296         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
8297         SDValue N120 = N1.getOperand(2).getOperand(0);
8298         if (N120.getOpcode() == ISD::FMUL) {
8299           SDValue N1200 = N120.getOperand(0);
8300           SDValue N1201 = N120.getOperand(1);
8301           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8302                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
8303                              N1.getOperand(1),
8304                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8305                                          DAG.getNode(ISD::FNEG, SL, VT,
8306                                              DAG.getNode(ISD::FP_EXTEND, SL,
8307                                                          VT, N1200)),
8308                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8309                                                      N1201),
8310                                          N0));
8311         }
8312       }
8313 
8314       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
8315       //   -> (fma (fneg (fpext y)), (fpext z),
8316       //           (fma (fneg (fpext u)), (fpext v), x))
8317       // FIXME: This turns two single-precision and one double-precision
8318       // operation into two double-precision operations, which might not be
8319       // interesting for all targets, especially GPUs.
8320       if (N1.getOpcode() == ISD::FP_EXTEND &&
8321         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
8322         SDValue N100 = N1.getOperand(0).getOperand(0);
8323         SDValue N101 = N1.getOperand(0).getOperand(1);
8324         SDValue N102 = N1.getOperand(0).getOperand(2);
8325         if (N102.getOpcode() == ISD::FMUL) {
8326           SDValue N1020 = N102.getOperand(0);
8327           SDValue N1021 = N102.getOperand(1);
8328           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8329                              DAG.getNode(ISD::FNEG, SL, VT,
8330                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8331                                                      N100)),
8332                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
8333                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8334                                          DAG.getNode(ISD::FNEG, SL, VT,
8335                                              DAG.getNode(ISD::FP_EXTEND, SL,
8336                                                          VT, N1020)),
8337                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8338                                                      N1021),
8339                                          N0));
8340         }
8341       }
8342     }
8343   }
8344 
8345   return SDValue();
8346 }
8347 
8348 /// Try to perform FMA combining on a given FMUL node.
8349 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
8350   SDValue N0 = N->getOperand(0);
8351   SDValue N1 = N->getOperand(1);
8352   EVT VT = N->getValueType(0);
8353   SDLoc SL(N);
8354 
8355   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
8356 
8357   const TargetOptions &Options = DAG.getTarget().Options;
8358   bool AllowFusion =
8359       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8360 
8361   // Floating-point multiply-add with intermediate rounding.
8362   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8363 
8364   // Floating-point multiply-add without intermediate rounding.
8365   bool HasFMA =
8366       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8367       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8368 
8369   // No valid opcode, do not combine.
8370   if (!HasFMAD && !HasFMA)
8371     return SDValue();
8372 
8373   // Always prefer FMAD to FMA for precision.
8374   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8375   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8376 
8377   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
8378   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
8379   auto FuseFADD = [&](SDValue X, SDValue Y) {
8380     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
8381       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8382       if (XC1 && XC1->isExactlyValue(+1.0))
8383         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8384       if (XC1 && XC1->isExactlyValue(-1.0))
8385         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8386                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8387     }
8388     return SDValue();
8389   };
8390 
8391   if (SDValue FMA = FuseFADD(N0, N1))
8392     return FMA;
8393   if (SDValue FMA = FuseFADD(N1, N0))
8394     return FMA;
8395 
8396   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
8397   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
8398   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
8399   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
8400   auto FuseFSUB = [&](SDValue X, SDValue Y) {
8401     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
8402       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
8403       if (XC0 && XC0->isExactlyValue(+1.0))
8404         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8405                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8406                            Y);
8407       if (XC0 && XC0->isExactlyValue(-1.0))
8408         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8409                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8410                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8411 
8412       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8413       if (XC1 && XC1->isExactlyValue(+1.0))
8414         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8415                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8416       if (XC1 && XC1->isExactlyValue(-1.0))
8417         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8418     }
8419     return SDValue();
8420   };
8421 
8422   if (SDValue FMA = FuseFSUB(N0, N1))
8423     return FMA;
8424   if (SDValue FMA = FuseFSUB(N1, N0))
8425     return FMA;
8426 
8427   return SDValue();
8428 }
8429 
8430 SDValue DAGCombiner::visitFADD(SDNode *N) {
8431   SDValue N0 = N->getOperand(0);
8432   SDValue N1 = N->getOperand(1);
8433   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
8434   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
8435   EVT VT = N->getValueType(0);
8436   SDLoc DL(N);
8437   const TargetOptions &Options = DAG.getTarget().Options;
8438   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8439 
8440   // fold vector ops
8441   if (VT.isVector())
8442     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8443       return FoldedVOp;
8444 
8445   // fold (fadd c1, c2) -> c1 + c2
8446   if (N0CFP && N1CFP)
8447     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
8448 
8449   // canonicalize constant to RHS
8450   if (N0CFP && !N1CFP)
8451     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
8452 
8453   // fold (fadd A, (fneg B)) -> (fsub A, B)
8454   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8455       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
8456     return DAG.getNode(ISD::FSUB, DL, VT, N0,
8457                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8458 
8459   // fold (fadd (fneg A), B) -> (fsub B, A)
8460   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8461       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
8462     return DAG.getNode(ISD::FSUB, DL, VT, N1,
8463                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
8464 
8465   // If 'unsafe math' is enabled, fold lots of things.
8466   if (Options.UnsafeFPMath) {
8467     // No FP constant should be created after legalization as Instruction
8468     // Selection pass has a hard time dealing with FP constants.
8469     bool AllowNewConst = (Level < AfterLegalizeDAG);
8470 
8471     // fold (fadd A, 0) -> A
8472     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
8473       if (N1C->isZero())
8474         return N0;
8475 
8476     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
8477     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
8478         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
8479       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
8480                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
8481                                      Flags),
8482                          Flags);
8483 
8484     // If allowed, fold (fadd (fneg x), x) -> 0.0
8485     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
8486       return DAG.getConstantFP(0.0, DL, VT);
8487 
8488     // If allowed, fold (fadd x, (fneg x)) -> 0.0
8489     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
8490       return DAG.getConstantFP(0.0, DL, VT);
8491 
8492     // We can fold chains of FADD's of the same value into multiplications.
8493     // This transform is not safe in general because we are reducing the number
8494     // of rounding steps.
8495     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
8496       if (N0.getOpcode() == ISD::FMUL) {
8497         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8498         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
8499 
8500         // (fadd (fmul x, c), x) -> (fmul x, c+1)
8501         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
8502           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8503                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8504           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
8505         }
8506 
8507         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
8508         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
8509             N1.getOperand(0) == N1.getOperand(1) &&
8510             N0.getOperand(0) == N1.getOperand(0)) {
8511           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8512                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8513           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
8514         }
8515       }
8516 
8517       if (N1.getOpcode() == ISD::FMUL) {
8518         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8519         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
8520 
8521         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
8522         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
8523           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8524                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8525           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
8526         }
8527 
8528         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
8529         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
8530             N0.getOperand(0) == N0.getOperand(1) &&
8531             N1.getOperand(0) == N0.getOperand(0)) {
8532           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8533                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8534           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
8535         }
8536       }
8537 
8538       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
8539         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8540         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
8541         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
8542             (N0.getOperand(0) == N1)) {
8543           return DAG.getNode(ISD::FMUL, DL, VT,
8544                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
8545         }
8546       }
8547 
8548       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
8549         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8550         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
8551         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
8552             N1.getOperand(0) == N0) {
8553           return DAG.getNode(ISD::FMUL, DL, VT,
8554                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
8555         }
8556       }
8557 
8558       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
8559       if (AllowNewConst &&
8560           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
8561           N0.getOperand(0) == N0.getOperand(1) &&
8562           N1.getOperand(0) == N1.getOperand(1) &&
8563           N0.getOperand(0) == N1.getOperand(0)) {
8564         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
8565                            DAG.getConstantFP(4.0, DL, VT), Flags);
8566       }
8567     }
8568   } // enable-unsafe-fp-math
8569 
8570   // FADD -> FMA combines:
8571   if (SDValue Fused = visitFADDForFMACombine(N)) {
8572     AddToWorklist(Fused.getNode());
8573     return Fused;
8574   }
8575   return SDValue();
8576 }
8577 
8578 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8579   SDValue N0 = N->getOperand(0);
8580   SDValue N1 = N->getOperand(1);
8581   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8582   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8583   EVT VT = N->getValueType(0);
8584   SDLoc DL(N);
8585   const TargetOptions &Options = DAG.getTarget().Options;
8586   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8587 
8588   // fold vector ops
8589   if (VT.isVector())
8590     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8591       return FoldedVOp;
8592 
8593   // fold (fsub c1, c2) -> c1-c2
8594   if (N0CFP && N1CFP)
8595     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
8596 
8597   // fold (fsub A, (fneg B)) -> (fadd A, B)
8598   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8599     return DAG.getNode(ISD::FADD, DL, VT, N0,
8600                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8601 
8602   // If 'unsafe math' is enabled, fold lots of things.
8603   if (Options.UnsafeFPMath) {
8604     // (fsub A, 0) -> A
8605     if (N1CFP && N1CFP->isZero())
8606       return N0;
8607 
8608     // (fsub 0, B) -> -B
8609     if (N0CFP && N0CFP->isZero()) {
8610       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8611         return GetNegatedExpression(N1, DAG, LegalOperations);
8612       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8613         return DAG.getNode(ISD::FNEG, DL, VT, N1);
8614     }
8615 
8616     // (fsub x, x) -> 0.0
8617     if (N0 == N1)
8618       return DAG.getConstantFP(0.0f, DL, VT);
8619 
8620     // (fsub x, (fadd x, y)) -> (fneg y)
8621     // (fsub x, (fadd y, x)) -> (fneg y)
8622     if (N1.getOpcode() == ISD::FADD) {
8623       SDValue N10 = N1->getOperand(0);
8624       SDValue N11 = N1->getOperand(1);
8625 
8626       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8627         return GetNegatedExpression(N11, DAG, LegalOperations);
8628 
8629       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8630         return GetNegatedExpression(N10, DAG, LegalOperations);
8631     }
8632   }
8633 
8634   // FSUB -> FMA combines:
8635   if (SDValue Fused = visitFSUBForFMACombine(N)) {
8636     AddToWorklist(Fused.getNode());
8637     return Fused;
8638   }
8639 
8640   return SDValue();
8641 }
8642 
8643 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8644   SDValue N0 = N->getOperand(0);
8645   SDValue N1 = N->getOperand(1);
8646   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8647   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8648   EVT VT = N->getValueType(0);
8649   SDLoc DL(N);
8650   const TargetOptions &Options = DAG.getTarget().Options;
8651   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8652 
8653   // fold vector ops
8654   if (VT.isVector()) {
8655     // This just handles C1 * C2 for vectors. Other vector folds are below.
8656     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8657       return FoldedVOp;
8658   }
8659 
8660   // fold (fmul c1, c2) -> c1*c2
8661   if (N0CFP && N1CFP)
8662     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
8663 
8664   // canonicalize constant to RHS
8665   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8666      !isConstantFPBuildVectorOrConstantFP(N1))
8667     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
8668 
8669   // fold (fmul A, 1.0) -> A
8670   if (N1CFP && N1CFP->isExactlyValue(1.0))
8671     return N0;
8672 
8673   if (Options.UnsafeFPMath) {
8674     // fold (fmul A, 0) -> 0
8675     if (N1CFP && N1CFP->isZero())
8676       return N1;
8677 
8678     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8679     if (N0.getOpcode() == ISD::FMUL) {
8680       // Fold scalars or any vector constants (not just splats).
8681       // This fold is done in general by InstCombine, but extra fmul insts
8682       // may have been generated during lowering.
8683       SDValue N00 = N0.getOperand(0);
8684       SDValue N01 = N0.getOperand(1);
8685       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8686       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8687       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8688 
8689       // Check 1: Make sure that the first operand of the inner multiply is NOT
8690       // a constant. Otherwise, we may induce infinite looping.
8691       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8692         // Check 2: Make sure that the second operand of the inner multiply and
8693         // the second operand of the outer multiply are constants.
8694         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8695             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8696           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
8697           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
8698         }
8699       }
8700     }
8701 
8702     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8703     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8704     // during an early run of DAGCombiner can prevent folding with fmuls
8705     // inserted during lowering.
8706     if (N0.getOpcode() == ISD::FADD &&
8707         (N0.getOperand(0) == N0.getOperand(1)) &&
8708         N0.hasOneUse()) {
8709       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8710       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
8711       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
8712     }
8713   }
8714 
8715   // fold (fmul X, 2.0) -> (fadd X, X)
8716   if (N1CFP && N1CFP->isExactlyValue(+2.0))
8717     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
8718 
8719   // fold (fmul X, -1.0) -> (fneg X)
8720   if (N1CFP && N1CFP->isExactlyValue(-1.0))
8721     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8722       return DAG.getNode(ISD::FNEG, DL, VT, N0);
8723 
8724   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8725   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8726     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8727       // Both can be negated for free, check to see if at least one is cheaper
8728       // negated.
8729       if (LHSNeg == 2 || RHSNeg == 2)
8730         return DAG.getNode(ISD::FMUL, DL, VT,
8731                            GetNegatedExpression(N0, DAG, LegalOperations),
8732                            GetNegatedExpression(N1, DAG, LegalOperations),
8733                            Flags);
8734     }
8735   }
8736 
8737   // FMUL -> FMA combines:
8738   if (SDValue Fused = visitFMULForFMACombine(N)) {
8739     AddToWorklist(Fused.getNode());
8740     return Fused;
8741   }
8742 
8743   return SDValue();
8744 }
8745 
8746 SDValue DAGCombiner::visitFMA(SDNode *N) {
8747   SDValue N0 = N->getOperand(0);
8748   SDValue N1 = N->getOperand(1);
8749   SDValue N2 = N->getOperand(2);
8750   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8751   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8752   EVT VT = N->getValueType(0);
8753   SDLoc DL(N);
8754   const TargetOptions &Options = DAG.getTarget().Options;
8755 
8756   // Constant fold FMA.
8757   if (isa<ConstantFPSDNode>(N0) &&
8758       isa<ConstantFPSDNode>(N1) &&
8759       isa<ConstantFPSDNode>(N2)) {
8760     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
8761   }
8762 
8763   if (Options.UnsafeFPMath) {
8764     if (N0CFP && N0CFP->isZero())
8765       return N2;
8766     if (N1CFP && N1CFP->isZero())
8767       return N2;
8768   }
8769   // TODO: The FMA node should have flags that propagate to these nodes.
8770   if (N0CFP && N0CFP->isExactlyValue(1.0))
8771     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8772   if (N1CFP && N1CFP->isExactlyValue(1.0))
8773     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8774 
8775   // Canonicalize (fma c, x, y) -> (fma x, c, y)
8776   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8777      !isConstantFPBuildVectorOrConstantFP(N1))
8778     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8779 
8780   // TODO: FMA nodes should have flags that propagate to the created nodes.
8781   // For now, create a Flags object for use with all unsafe math transforms.
8782   SDNodeFlags Flags;
8783   Flags.setUnsafeAlgebra(true);
8784 
8785   if (Options.UnsafeFPMath) {
8786     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8787     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
8788         isConstantFPBuildVectorOrConstantFP(N1) &&
8789         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
8790       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8791                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
8792                                      &Flags), &Flags);
8793     }
8794 
8795     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8796     if (N0.getOpcode() == ISD::FMUL &&
8797         isConstantFPBuildVectorOrConstantFP(N1) &&
8798         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
8799       return DAG.getNode(ISD::FMA, DL, VT,
8800                          N0.getOperand(0),
8801                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
8802                                      &Flags),
8803                          N2);
8804     }
8805   }
8806 
8807   // (fma x, 1, y) -> (fadd x, y)
8808   // (fma x, -1, y) -> (fadd (fneg x), y)
8809   if (N1CFP) {
8810     if (N1CFP->isExactlyValue(1.0))
8811       // TODO: The FMA node should have flags that propagate to this node.
8812       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
8813 
8814     if (N1CFP->isExactlyValue(-1.0) &&
8815         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8816       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
8817       AddToWorklist(RHSNeg.getNode());
8818       // TODO: The FMA node should have flags that propagate to this node.
8819       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
8820     }
8821   }
8822 
8823   if (Options.UnsafeFPMath) {
8824     // (fma x, c, x) -> (fmul x, (c+1))
8825     if (N1CFP && N0 == N2) {
8826       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8827                          DAG.getNode(ISD::FADD, DL, VT, N1,
8828                                      DAG.getConstantFP(1.0, DL, VT), &Flags),
8829                          &Flags);
8830     }
8831 
8832     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
8833     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
8834       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8835                          DAG.getNode(ISD::FADD, DL, VT, N1,
8836                                      DAG.getConstantFP(-1.0, DL, VT), &Flags),
8837                          &Flags);
8838     }
8839   }
8840 
8841   return SDValue();
8842 }
8843 
8844 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8845 // reciprocal.
8846 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8847 // Notice that this is not always beneficial. One reason is different target
8848 // may have different costs for FDIV and FMUL, so sometimes the cost of two
8849 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8850 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8851 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
8852   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
8853   const SDNodeFlags *Flags = N->getFlags();
8854   if (!UnsafeMath && !Flags->hasAllowReciprocal())
8855     return SDValue();
8856 
8857   // Skip if current node is a reciprocal.
8858   SDValue N0 = N->getOperand(0);
8859   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8860   if (N0CFP && N0CFP->isExactlyValue(1.0))
8861     return SDValue();
8862 
8863   // Exit early if the target does not want this transform or if there can't
8864   // possibly be enough uses of the divisor to make the transform worthwhile.
8865   SDValue N1 = N->getOperand(1);
8866   unsigned MinUses = TLI.combineRepeatedFPDivisors();
8867   if (!MinUses || N1->use_size() < MinUses)
8868     return SDValue();
8869 
8870   // Find all FDIV users of the same divisor.
8871   // Use a set because duplicates may be present in the user list.
8872   SetVector<SDNode *> Users;
8873   for (auto *U : N1->uses()) {
8874     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
8875       // This division is eligible for optimization only if global unsafe math
8876       // is enabled or if this division allows reciprocal formation.
8877       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
8878         Users.insert(U);
8879     }
8880   }
8881 
8882   // Now that we have the actual number of divisor uses, make sure it meets
8883   // the minimum threshold specified by the target.
8884   if (Users.size() < MinUses)
8885     return SDValue();
8886 
8887   EVT VT = N->getValueType(0);
8888   SDLoc DL(N);
8889   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8890   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
8891 
8892   // Dividend / Divisor -> Dividend * Reciprocal
8893   for (auto *U : Users) {
8894     SDValue Dividend = U->getOperand(0);
8895     if (Dividend != FPOne) {
8896       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8897                                     Reciprocal, Flags);
8898       CombineTo(U, NewNode);
8899     } else if (U != Reciprocal.getNode()) {
8900       // In the absence of fast-math-flags, this user node is always the
8901       // same node as Reciprocal, but with FMF they may be different nodes.
8902       CombineTo(U, Reciprocal);
8903     }
8904   }
8905   return SDValue(N, 0);  // N was replaced.
8906 }
8907 
8908 SDValue DAGCombiner::visitFDIV(SDNode *N) {
8909   SDValue N0 = N->getOperand(0);
8910   SDValue N1 = N->getOperand(1);
8911   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8912   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8913   EVT VT = N->getValueType(0);
8914   SDLoc DL(N);
8915   const TargetOptions &Options = DAG.getTarget().Options;
8916   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8917 
8918   // fold vector ops
8919   if (VT.isVector())
8920     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8921       return FoldedVOp;
8922 
8923   // fold (fdiv c1, c2) -> c1/c2
8924   if (N0CFP && N1CFP)
8925     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
8926 
8927   if (Options.UnsafeFPMath) {
8928     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
8929     if (N1CFP) {
8930       // Compute the reciprocal 1.0 / c2.
8931       const APFloat &N1APF = N1CFP->getValueAPF();
8932       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
8933       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
8934       // Only do the transform if the reciprocal is a legal fp immediate that
8935       // isn't too nasty (eg NaN, denormal, ...).
8936       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
8937           (!LegalOperations ||
8938            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
8939            // backend)... we should handle this gracefully after Legalize.
8940            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
8941            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
8942            TLI.isFPImmLegal(Recip, VT)))
8943         return DAG.getNode(ISD::FMUL, DL, VT, N0,
8944                            DAG.getConstantFP(Recip, DL, VT), Flags);
8945     }
8946 
8947     // If this FDIV is part of a reciprocal square root, it may be folded
8948     // into a target-specific square root estimate instruction.
8949     if (N1.getOpcode() == ISD::FSQRT) {
8950       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
8951         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8952       }
8953     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
8954                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8955       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8956                                           Flags)) {
8957         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
8958         AddToWorklist(RV.getNode());
8959         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8960       }
8961     } else if (N1.getOpcode() == ISD::FP_ROUND &&
8962                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8963       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8964                                           Flags)) {
8965         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
8966         AddToWorklist(RV.getNode());
8967         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8968       }
8969     } else if (N1.getOpcode() == ISD::FMUL) {
8970       // Look through an FMUL. Even though this won't remove the FDIV directly,
8971       // it's still worthwhile to get rid of the FSQRT if possible.
8972       SDValue SqrtOp;
8973       SDValue OtherOp;
8974       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8975         SqrtOp = N1.getOperand(0);
8976         OtherOp = N1.getOperand(1);
8977       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
8978         SqrtOp = N1.getOperand(1);
8979         OtherOp = N1.getOperand(0);
8980       }
8981       if (SqrtOp.getNode()) {
8982         // We found a FSQRT, so try to make this fold:
8983         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
8984         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
8985           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
8986           AddToWorklist(RV.getNode());
8987           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8988         }
8989       }
8990     }
8991 
8992     // Fold into a reciprocal estimate and multiply instead of a real divide.
8993     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
8994       AddToWorklist(RV.getNode());
8995       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8996     }
8997   }
8998 
8999   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
9000   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9001     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9002       // Both can be negated for free, check to see if at least one is cheaper
9003       // negated.
9004       if (LHSNeg == 2 || RHSNeg == 2)
9005         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
9006                            GetNegatedExpression(N0, DAG, LegalOperations),
9007                            GetNegatedExpression(N1, DAG, LegalOperations),
9008                            Flags);
9009     }
9010   }
9011 
9012   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
9013     return CombineRepeatedDivisors;
9014 
9015   return SDValue();
9016 }
9017 
9018 SDValue DAGCombiner::visitFREM(SDNode *N) {
9019   SDValue N0 = N->getOperand(0);
9020   SDValue N1 = N->getOperand(1);
9021   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9022   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9023   EVT VT = N->getValueType(0);
9024 
9025   // fold (frem c1, c2) -> fmod(c1,c2)
9026   if (N0CFP && N1CFP)
9027     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
9028                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
9029 
9030   return SDValue();
9031 }
9032 
9033 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
9034   if (!DAG.getTarget().Options.UnsafeFPMath)
9035     return SDValue();
9036 
9037   SDValue N0 = N->getOperand(0);
9038   if (TLI.isFsqrtCheap(N0, DAG))
9039     return SDValue();
9040 
9041   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
9042   // For now, create a Flags object for use with all unsafe math transforms.
9043   SDNodeFlags Flags;
9044   Flags.setUnsafeAlgebra(true);
9045   return buildSqrtEstimate(N0, &Flags);
9046 }
9047 
9048 /// copysign(x, fp_extend(y)) -> copysign(x, y)
9049 /// copysign(x, fp_round(y)) -> copysign(x, y)
9050 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
9051   SDValue N1 = N->getOperand(1);
9052   if ((N1.getOpcode() == ISD::FP_EXTEND ||
9053        N1.getOpcode() == ISD::FP_ROUND)) {
9054     // Do not optimize out type conversion of f128 type yet.
9055     // For some targets like x86_64, configuration is changed to keep one f128
9056     // value in one SSE register, but instruction selection cannot handle
9057     // FCOPYSIGN on SSE registers yet.
9058     EVT N1VT = N1->getValueType(0);
9059     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
9060     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
9061   }
9062   return false;
9063 }
9064 
9065 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
9066   SDValue N0 = N->getOperand(0);
9067   SDValue N1 = N->getOperand(1);
9068   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9069   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9070   EVT VT = N->getValueType(0);
9071 
9072   if (N0CFP && N1CFP) // Constant fold
9073     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
9074 
9075   if (N1CFP) {
9076     const APFloat &V = N1CFP->getValueAPF();
9077     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
9078     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9079     if (!V.isNegative()) {
9080       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9081         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9082     } else {
9083       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9084         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9085                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9086     }
9087   }
9088 
9089   // copysign(fabs(x), y) -> copysign(x, y)
9090   // copysign(fneg(x), y) -> copysign(x, y)
9091   // copysign(copysign(x,z), y) -> copysign(x, y)
9092   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9093       N0.getOpcode() == ISD::FCOPYSIGN)
9094     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
9095 
9096   // copysign(x, abs(y)) -> abs(x)
9097   if (N1.getOpcode() == ISD::FABS)
9098     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9099 
9100   // copysign(x, copysign(y,z)) -> copysign(x, z)
9101   if (N1.getOpcode() == ISD::FCOPYSIGN)
9102     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
9103 
9104   // copysign(x, fp_extend(y)) -> copysign(x, y)
9105   // copysign(x, fp_round(y)) -> copysign(x, y)
9106   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
9107     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
9108 
9109   return SDValue();
9110 }
9111 
9112 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9113   SDValue N0 = N->getOperand(0);
9114   EVT VT = N->getValueType(0);
9115   EVT OpVT = N0.getValueType();
9116 
9117   // fold (sint_to_fp c1) -> c1fp
9118   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9119       // ...but only if the target supports immediate floating-point values
9120       (!LegalOperations ||
9121        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9122     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9123 
9124   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9125   // but UINT_TO_FP is legal on this target, try to convert.
9126   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9127       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9128     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9129     if (DAG.SignBitIsZero(N0))
9130       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9131   }
9132 
9133   // The next optimizations are desirable only if SELECT_CC can be lowered.
9134   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9135     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9136     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9137         !VT.isVector() &&
9138         (!LegalOperations ||
9139          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9140       SDLoc DL(N);
9141       SDValue Ops[] =
9142         { N0.getOperand(0), N0.getOperand(1),
9143           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9144           N0.getOperand(2) };
9145       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9146     }
9147 
9148     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9149     //      (select_cc x, y, 1.0, 0.0,, cc)
9150     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9151         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9152         (!LegalOperations ||
9153          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9154       SDLoc DL(N);
9155       SDValue Ops[] =
9156         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
9157           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9158           N0.getOperand(0).getOperand(2) };
9159       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9160     }
9161   }
9162 
9163   return SDValue();
9164 }
9165 
9166 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
9167   SDValue N0 = N->getOperand(0);
9168   EVT VT = N->getValueType(0);
9169   EVT OpVT = N0.getValueType();
9170 
9171   // fold (uint_to_fp c1) -> c1fp
9172   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9173       // ...but only if the target supports immediate floating-point values
9174       (!LegalOperations ||
9175        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9176     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9177 
9178   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
9179   // but SINT_TO_FP is legal on this target, try to convert.
9180   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
9181       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
9182     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
9183     if (DAG.SignBitIsZero(N0))
9184       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9185   }
9186 
9187   // The next optimizations are desirable only if SELECT_CC can be lowered.
9188   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9189     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9190 
9191     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
9192         (!LegalOperations ||
9193          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9194       SDLoc DL(N);
9195       SDValue Ops[] =
9196         { N0.getOperand(0), N0.getOperand(1),
9197           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9198           N0.getOperand(2) };
9199       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9200     }
9201   }
9202 
9203   return SDValue();
9204 }
9205 
9206 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
9207 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
9208   SDValue N0 = N->getOperand(0);
9209   EVT VT = N->getValueType(0);
9210 
9211   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
9212     return SDValue();
9213 
9214   SDValue Src = N0.getOperand(0);
9215   EVT SrcVT = Src.getValueType();
9216   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
9217   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
9218 
9219   // We can safely assume the conversion won't overflow the output range,
9220   // because (for example) (uint8_t)18293.f is undefined behavior.
9221 
9222   // Since we can assume the conversion won't overflow, our decision as to
9223   // whether the input will fit in the float should depend on the minimum
9224   // of the input range and output range.
9225 
9226   // This means this is also safe for a signed input and unsigned output, since
9227   // a negative input would lead to undefined behavior.
9228   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
9229   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
9230   unsigned ActualSize = std::min(InputSize, OutputSize);
9231   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
9232 
9233   // We can only fold away the float conversion if the input range can be
9234   // represented exactly in the float range.
9235   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
9236     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
9237       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
9238                                                        : ISD::ZERO_EXTEND;
9239       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
9240     }
9241     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
9242       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
9243     return DAG.getBitcast(VT, Src);
9244   }
9245   return SDValue();
9246 }
9247 
9248 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
9249   SDValue N0 = N->getOperand(0);
9250   EVT VT = N->getValueType(0);
9251 
9252   // fold (fp_to_sint c1fp) -> c1
9253   if (isConstantFPBuildVectorOrConstantFP(N0))
9254     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
9255 
9256   return FoldIntToFPToInt(N, DAG);
9257 }
9258 
9259 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
9260   SDValue N0 = N->getOperand(0);
9261   EVT VT = N->getValueType(0);
9262 
9263   // fold (fp_to_uint c1fp) -> c1
9264   if (isConstantFPBuildVectorOrConstantFP(N0))
9265     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
9266 
9267   return FoldIntToFPToInt(N, DAG);
9268 }
9269 
9270 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
9271   SDValue N0 = N->getOperand(0);
9272   SDValue N1 = N->getOperand(1);
9273   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9274   EVT VT = N->getValueType(0);
9275 
9276   // fold (fp_round c1fp) -> c1fp
9277   if (N0CFP)
9278     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
9279 
9280   // fold (fp_round (fp_extend x)) -> x
9281   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
9282     return N0.getOperand(0);
9283 
9284   // fold (fp_round (fp_round x)) -> (fp_round x)
9285   if (N0.getOpcode() == ISD::FP_ROUND) {
9286     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
9287     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
9288 
9289     // Skip this folding if it results in an fp_round from f80 to f16.
9290     //
9291     // f80 to f16 always generates an expensive (and as yet, unimplemented)
9292     // libcall to __truncxfhf2 instead of selecting native f16 conversion
9293     // instructions from f32 or f64.  Moreover, the first (value-preserving)
9294     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
9295     // x86.
9296     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
9297       return SDValue();
9298 
9299     // If the first fp_round isn't a value preserving truncation, it might
9300     // introduce a tie in the second fp_round, that wouldn't occur in the
9301     // single-step fp_round we want to fold to.
9302     // In other words, double rounding isn't the same as rounding.
9303     // Also, this is a value preserving truncation iff both fp_round's are.
9304     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
9305       SDLoc DL(N);
9306       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
9307                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
9308     }
9309   }
9310 
9311   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
9312   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
9313     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
9314                               N0.getOperand(0), N1);
9315     AddToWorklist(Tmp.getNode());
9316     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9317                        Tmp, N0.getOperand(1));
9318   }
9319 
9320   return SDValue();
9321 }
9322 
9323 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
9324   SDValue N0 = N->getOperand(0);
9325   EVT VT = N->getValueType(0);
9326   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9327   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9328 
9329   // fold (fp_round_inreg c1fp) -> c1fp
9330   if (N0CFP && isTypeLegal(EVT)) {
9331     SDLoc DL(N);
9332     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
9333     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
9334   }
9335 
9336   return SDValue();
9337 }
9338 
9339 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
9340   SDValue N0 = N->getOperand(0);
9341   EVT VT = N->getValueType(0);
9342 
9343   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
9344   if (N->hasOneUse() &&
9345       N->use_begin()->getOpcode() == ISD::FP_ROUND)
9346     return SDValue();
9347 
9348   // fold (fp_extend c1fp) -> c1fp
9349   if (isConstantFPBuildVectorOrConstantFP(N0))
9350     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
9351 
9352   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
9353   if (N0.getOpcode() == ISD::FP16_TO_FP &&
9354       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
9355     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
9356 
9357   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
9358   // value of X.
9359   if (N0.getOpcode() == ISD::FP_ROUND
9360       && N0.getNode()->getConstantOperandVal(1) == 1) {
9361     SDValue In = N0.getOperand(0);
9362     if (In.getValueType() == VT) return In;
9363     if (VT.bitsLT(In.getValueType()))
9364       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
9365                          In, N0.getOperand(1));
9366     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
9367   }
9368 
9369   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
9370   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9371        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9372     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9373     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9374                                      LN0->getChain(),
9375                                      LN0->getBasePtr(), N0.getValueType(),
9376                                      LN0->getMemOperand());
9377     CombineTo(N, ExtLoad);
9378     CombineTo(N0.getNode(),
9379               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
9380                           N0.getValueType(), ExtLoad,
9381                           DAG.getIntPtrConstant(1, SDLoc(N0))),
9382               ExtLoad.getValue(1));
9383     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9384   }
9385 
9386   return SDValue();
9387 }
9388 
9389 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
9390   SDValue N0 = N->getOperand(0);
9391   EVT VT = N->getValueType(0);
9392 
9393   // fold (fceil c1) -> fceil(c1)
9394   if (isConstantFPBuildVectorOrConstantFP(N0))
9395     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
9396 
9397   return SDValue();
9398 }
9399 
9400 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
9401   SDValue N0 = N->getOperand(0);
9402   EVT VT = N->getValueType(0);
9403 
9404   // fold (ftrunc c1) -> ftrunc(c1)
9405   if (isConstantFPBuildVectorOrConstantFP(N0))
9406     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
9407 
9408   return SDValue();
9409 }
9410 
9411 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
9412   SDValue N0 = N->getOperand(0);
9413   EVT VT = N->getValueType(0);
9414 
9415   // fold (ffloor c1) -> ffloor(c1)
9416   if (isConstantFPBuildVectorOrConstantFP(N0))
9417     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
9418 
9419   return SDValue();
9420 }
9421 
9422 // FIXME: FNEG and FABS have a lot in common; refactor.
9423 SDValue DAGCombiner::visitFNEG(SDNode *N) {
9424   SDValue N0 = N->getOperand(0);
9425   EVT VT = N->getValueType(0);
9426 
9427   // Constant fold FNEG.
9428   if (isConstantFPBuildVectorOrConstantFP(N0))
9429     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
9430 
9431   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
9432                          &DAG.getTarget().Options))
9433     return GetNegatedExpression(N0, DAG, LegalOperations);
9434 
9435   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
9436   // constant pool values.
9437   if (!TLI.isFNegFree(VT) &&
9438       N0.getOpcode() == ISD::BITCAST &&
9439       N0.getNode()->hasOneUse()) {
9440     SDValue Int = N0.getOperand(0);
9441     EVT IntVT = Int.getValueType();
9442     if (IntVT.isInteger() && !IntVT.isVector()) {
9443       APInt SignMask;
9444       if (N0.getValueType().isVector()) {
9445         // For a vector, get a mask such as 0x80... per scalar element
9446         // and splat it.
9447         SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
9448         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9449       } else {
9450         // For a scalar, just generate 0x80...
9451         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
9452       }
9453       SDLoc DL0(N0);
9454       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
9455                         DAG.getConstant(SignMask, DL0, IntVT));
9456       AddToWorklist(Int.getNode());
9457       return DAG.getBitcast(VT, Int);
9458     }
9459   }
9460 
9461   // (fneg (fmul c, x)) -> (fmul -c, x)
9462   if (N0.getOpcode() == ISD::FMUL &&
9463       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
9464     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
9465     if (CFP1) {
9466       APFloat CVal = CFP1->getValueAPF();
9467       CVal.changeSign();
9468       if (Level >= AfterLegalizeDAG &&
9469           (TLI.isFPImmLegal(CVal, VT) ||
9470            TLI.isOperationLegal(ISD::ConstantFP, VT)))
9471         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
9472                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9473                                        N0.getOperand(1)),
9474                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
9475     }
9476   }
9477 
9478   return SDValue();
9479 }
9480 
9481 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
9482   SDValue N0 = N->getOperand(0);
9483   SDValue N1 = N->getOperand(1);
9484   EVT VT = N->getValueType(0);
9485   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9486   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9487 
9488   if (N0CFP && N1CFP) {
9489     const APFloat &C0 = N0CFP->getValueAPF();
9490     const APFloat &C1 = N1CFP->getValueAPF();
9491     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
9492   }
9493 
9494   // Canonicalize to constant on RHS.
9495   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9496      !isConstantFPBuildVectorOrConstantFP(N1))
9497     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
9498 
9499   return SDValue();
9500 }
9501 
9502 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
9503   SDValue N0 = N->getOperand(0);
9504   SDValue N1 = N->getOperand(1);
9505   EVT VT = N->getValueType(0);
9506   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9507   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9508 
9509   if (N0CFP && N1CFP) {
9510     const APFloat &C0 = N0CFP->getValueAPF();
9511     const APFloat &C1 = N1CFP->getValueAPF();
9512     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
9513   }
9514 
9515   // Canonicalize to constant on RHS.
9516   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9517      !isConstantFPBuildVectorOrConstantFP(N1))
9518     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
9519 
9520   return SDValue();
9521 }
9522 
9523 SDValue DAGCombiner::visitFABS(SDNode *N) {
9524   SDValue N0 = N->getOperand(0);
9525   EVT VT = N->getValueType(0);
9526 
9527   // fold (fabs c1) -> fabs(c1)
9528   if (isConstantFPBuildVectorOrConstantFP(N0))
9529     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9530 
9531   // fold (fabs (fabs x)) -> (fabs x)
9532   if (N0.getOpcode() == ISD::FABS)
9533     return N->getOperand(0);
9534 
9535   // fold (fabs (fneg x)) -> (fabs x)
9536   // fold (fabs (fcopysign x, y)) -> (fabs x)
9537   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
9538     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
9539 
9540   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
9541   // constant pool values.
9542   if (!TLI.isFAbsFree(VT) &&
9543       N0.getOpcode() == ISD::BITCAST &&
9544       N0.getNode()->hasOneUse()) {
9545     SDValue Int = N0.getOperand(0);
9546     EVT IntVT = Int.getValueType();
9547     if (IntVT.isInteger() && !IntVT.isVector()) {
9548       APInt SignMask;
9549       if (N0.getValueType().isVector()) {
9550         // For a vector, get a mask such as 0x7f... per scalar element
9551         // and splat it.
9552         SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
9553         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9554       } else {
9555         // For a scalar, just generate 0x7f...
9556         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
9557       }
9558       SDLoc DL(N0);
9559       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
9560                         DAG.getConstant(SignMask, DL, IntVT));
9561       AddToWorklist(Int.getNode());
9562       return DAG.getBitcast(N->getValueType(0), Int);
9563     }
9564   }
9565 
9566   return SDValue();
9567 }
9568 
9569 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
9570   SDValue Chain = N->getOperand(0);
9571   SDValue N1 = N->getOperand(1);
9572   SDValue N2 = N->getOperand(2);
9573 
9574   // If N is a constant we could fold this into a fallthrough or unconditional
9575   // branch. However that doesn't happen very often in normal code, because
9576   // Instcombine/SimplifyCFG should have handled the available opportunities.
9577   // If we did this folding here, it would be necessary to update the
9578   // MachineBasicBlock CFG, which is awkward.
9579 
9580   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
9581   // on the target.
9582   if (N1.getOpcode() == ISD::SETCC &&
9583       TLI.isOperationLegalOrCustom(ISD::BR_CC,
9584                                    N1.getOperand(0).getValueType())) {
9585     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9586                        Chain, N1.getOperand(2),
9587                        N1.getOperand(0), N1.getOperand(1), N2);
9588   }
9589 
9590   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
9591       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
9592        (N1.getOperand(0).hasOneUse() &&
9593         N1.getOperand(0).getOpcode() == ISD::SRL))) {
9594     SDNode *Trunc = nullptr;
9595     if (N1.getOpcode() == ISD::TRUNCATE) {
9596       // Look pass the truncate.
9597       Trunc = N1.getNode();
9598       N1 = N1.getOperand(0);
9599     }
9600 
9601     // Match this pattern so that we can generate simpler code:
9602     //
9603     //   %a = ...
9604     //   %b = and i32 %a, 2
9605     //   %c = srl i32 %b, 1
9606     //   brcond i32 %c ...
9607     //
9608     // into
9609     //
9610     //   %a = ...
9611     //   %b = and i32 %a, 2
9612     //   %c = setcc eq %b, 0
9613     //   brcond %c ...
9614     //
9615     // This applies only when the AND constant value has one bit set and the
9616     // SRL constant is equal to the log2 of the AND constant. The back-end is
9617     // smart enough to convert the result into a TEST/JMP sequence.
9618     SDValue Op0 = N1.getOperand(0);
9619     SDValue Op1 = N1.getOperand(1);
9620 
9621     if (Op0.getOpcode() == ISD::AND &&
9622         Op1.getOpcode() == ISD::Constant) {
9623       SDValue AndOp1 = Op0.getOperand(1);
9624 
9625       if (AndOp1.getOpcode() == ISD::Constant) {
9626         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
9627 
9628         if (AndConst.isPowerOf2() &&
9629             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9630           SDLoc DL(N);
9631           SDValue SetCC =
9632             DAG.getSetCC(DL,
9633                          getSetCCResultType(Op0.getValueType()),
9634                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9635                          ISD::SETNE);
9636 
9637           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9638                                           MVT::Other, Chain, SetCC, N2);
9639           // Don't add the new BRCond into the worklist or else SimplifySelectCC
9640           // will convert it back to (X & C1) >> C2.
9641           CombineTo(N, NewBRCond, false);
9642           // Truncate is dead.
9643           if (Trunc)
9644             deleteAndRecombine(Trunc);
9645           // Replace the uses of SRL with SETCC
9646           WorklistRemover DeadNodes(*this);
9647           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9648           deleteAndRecombine(N1.getNode());
9649           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9650         }
9651       }
9652     }
9653 
9654     if (Trunc)
9655       // Restore N1 if the above transformation doesn't match.
9656       N1 = N->getOperand(1);
9657   }
9658 
9659   // Transform br(xor(x, y)) -> br(x != y)
9660   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9661   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9662     SDNode *TheXor = N1.getNode();
9663     SDValue Op0 = TheXor->getOperand(0);
9664     SDValue Op1 = TheXor->getOperand(1);
9665     if (Op0.getOpcode() == Op1.getOpcode()) {
9666       // Avoid missing important xor optimizations.
9667       if (SDValue Tmp = visitXOR(TheXor)) {
9668         if (Tmp.getNode() != TheXor) {
9669           DEBUG(dbgs() << "\nReplacing.8 ";
9670                 TheXor->dump(&DAG);
9671                 dbgs() << "\nWith: ";
9672                 Tmp.getNode()->dump(&DAG);
9673                 dbgs() << '\n');
9674           WorklistRemover DeadNodes(*this);
9675           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9676           deleteAndRecombine(TheXor);
9677           return DAG.getNode(ISD::BRCOND, SDLoc(N),
9678                              MVT::Other, Chain, Tmp, N2);
9679         }
9680 
9681         // visitXOR has changed XOR's operands or replaced the XOR completely,
9682         // bail out.
9683         return SDValue(N, 0);
9684       }
9685     }
9686 
9687     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9688       bool Equal = false;
9689       if (isOneConstant(Op0) && Op0.hasOneUse() &&
9690           Op0.getOpcode() == ISD::XOR) {
9691         TheXor = Op0.getNode();
9692         Equal = true;
9693       }
9694 
9695       EVT SetCCVT = N1.getValueType();
9696       if (LegalTypes)
9697         SetCCVT = getSetCCResultType(SetCCVT);
9698       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9699                                    SetCCVT,
9700                                    Op0, Op1,
9701                                    Equal ? ISD::SETEQ : ISD::SETNE);
9702       // Replace the uses of XOR with SETCC
9703       WorklistRemover DeadNodes(*this);
9704       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9705       deleteAndRecombine(N1.getNode());
9706       return DAG.getNode(ISD::BRCOND, SDLoc(N),
9707                          MVT::Other, Chain, SetCC, N2);
9708     }
9709   }
9710 
9711   return SDValue();
9712 }
9713 
9714 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9715 //
9716 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9717   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9718   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9719 
9720   // If N is a constant we could fold this into a fallthrough or unconditional
9721   // branch. However that doesn't happen very often in normal code, because
9722   // Instcombine/SimplifyCFG should have handled the available opportunities.
9723   // If we did this folding here, it would be necessary to update the
9724   // MachineBasicBlock CFG, which is awkward.
9725 
9726   // Use SimplifySetCC to simplify SETCC's.
9727   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9728                                CondLHS, CondRHS, CC->get(), SDLoc(N),
9729                                false);
9730   if (Simp.getNode()) AddToWorklist(Simp.getNode());
9731 
9732   // fold to a simpler setcc
9733   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9734     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9735                        N->getOperand(0), Simp.getOperand(2),
9736                        Simp.getOperand(0), Simp.getOperand(1),
9737                        N->getOperand(4));
9738 
9739   return SDValue();
9740 }
9741 
9742 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9743 /// and that N may be folded in the load / store addressing mode.
9744 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
9745                                     SelectionDAG &DAG,
9746                                     const TargetLowering &TLI) {
9747   EVT VT;
9748   unsigned AS;
9749 
9750   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
9751     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9752       return false;
9753     VT = LD->getMemoryVT();
9754     AS = LD->getAddressSpace();
9755   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
9756     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9757       return false;
9758     VT = ST->getMemoryVT();
9759     AS = ST->getAddressSpace();
9760   } else
9761     return false;
9762 
9763   TargetLowering::AddrMode AM;
9764   if (N->getOpcode() == ISD::ADD) {
9765     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9766     if (Offset)
9767       // [reg +/- imm]
9768       AM.BaseOffs = Offset->getSExtValue();
9769     else
9770       // [reg +/- reg]
9771       AM.Scale = 1;
9772   } else if (N->getOpcode() == ISD::SUB) {
9773     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9774     if (Offset)
9775       // [reg +/- imm]
9776       AM.BaseOffs = -Offset->getSExtValue();
9777     else
9778       // [reg +/- reg]
9779       AM.Scale = 1;
9780   } else
9781     return false;
9782 
9783   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9784                                    VT.getTypeForEVT(*DAG.getContext()), AS);
9785 }
9786 
9787 /// Try turning a load/store into a pre-indexed load/store when the base
9788 /// pointer is an add or subtract and it has other uses besides the load/store.
9789 /// After the transformation, the new indexed load/store has effectively folded
9790 /// the add/subtract in and all of its other uses are redirected to the
9791 /// new load/store.
9792 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9793   if (Level < AfterLegalizeDAG)
9794     return false;
9795 
9796   bool isLoad = true;
9797   SDValue Ptr;
9798   EVT VT;
9799   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9800     if (LD->isIndexed())
9801       return false;
9802     VT = LD->getMemoryVT();
9803     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9804         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9805       return false;
9806     Ptr = LD->getBasePtr();
9807   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9808     if (ST->isIndexed())
9809       return false;
9810     VT = ST->getMemoryVT();
9811     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9812         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9813       return false;
9814     Ptr = ST->getBasePtr();
9815     isLoad = false;
9816   } else {
9817     return false;
9818   }
9819 
9820   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
9821   // out.  There is no reason to make this a preinc/predec.
9822   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
9823       Ptr.getNode()->hasOneUse())
9824     return false;
9825 
9826   // Ask the target to do addressing mode selection.
9827   SDValue BasePtr;
9828   SDValue Offset;
9829   ISD::MemIndexedMode AM = ISD::UNINDEXED;
9830   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
9831     return false;
9832 
9833   // Backends without true r+i pre-indexed forms may need to pass a
9834   // constant base with a variable offset so that constant coercion
9835   // will work with the patterns in canonical form.
9836   bool Swapped = false;
9837   if (isa<ConstantSDNode>(BasePtr)) {
9838     std::swap(BasePtr, Offset);
9839     Swapped = true;
9840   }
9841 
9842   // Don't create a indexed load / store with zero offset.
9843   if (isNullConstant(Offset))
9844     return false;
9845 
9846   // Try turning it into a pre-indexed load / store except when:
9847   // 1) The new base ptr is a frame index.
9848   // 2) If N is a store and the new base ptr is either the same as or is a
9849   //    predecessor of the value being stored.
9850   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
9851   //    that would create a cycle.
9852   // 4) All uses are load / store ops that use it as old base ptr.
9853 
9854   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
9855   // (plus the implicit offset) to a register to preinc anyway.
9856   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9857     return false;
9858 
9859   // Check #2.
9860   if (!isLoad) {
9861     SDValue Val = cast<StoreSDNode>(N)->getValue();
9862     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
9863       return false;
9864   }
9865 
9866   // Caches for hasPredecessorHelper.
9867   SmallPtrSet<const SDNode *, 32> Visited;
9868   SmallVector<const SDNode *, 16> Worklist;
9869   Worklist.push_back(N);
9870 
9871   // If the offset is a constant, there may be other adds of constants that
9872   // can be folded with this one. We should do this to avoid having to keep
9873   // a copy of the original base pointer.
9874   SmallVector<SDNode *, 16> OtherUses;
9875   if (isa<ConstantSDNode>(Offset))
9876     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
9877                               UE = BasePtr.getNode()->use_end();
9878          UI != UE; ++UI) {
9879       SDUse &Use = UI.getUse();
9880       // Skip the use that is Ptr and uses of other results from BasePtr's
9881       // node (important for nodes that return multiple results).
9882       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
9883         continue;
9884 
9885       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
9886         continue;
9887 
9888       if (Use.getUser()->getOpcode() != ISD::ADD &&
9889           Use.getUser()->getOpcode() != ISD::SUB) {
9890         OtherUses.clear();
9891         break;
9892       }
9893 
9894       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
9895       if (!isa<ConstantSDNode>(Op1)) {
9896         OtherUses.clear();
9897         break;
9898       }
9899 
9900       // FIXME: In some cases, we can be smarter about this.
9901       if (Op1.getValueType() != Offset.getValueType()) {
9902         OtherUses.clear();
9903         break;
9904       }
9905 
9906       OtherUses.push_back(Use.getUser());
9907     }
9908 
9909   if (Swapped)
9910     std::swap(BasePtr, Offset);
9911 
9912   // Now check for #3 and #4.
9913   bool RealUse = false;
9914 
9915   for (SDNode *Use : Ptr.getNode()->uses()) {
9916     if (Use == N)
9917       continue;
9918     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
9919       return false;
9920 
9921     // If Ptr may be folded in addressing mode of other use, then it's
9922     // not profitable to do this transformation.
9923     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
9924       RealUse = true;
9925   }
9926 
9927   if (!RealUse)
9928     return false;
9929 
9930   SDValue Result;
9931   if (isLoad)
9932     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9933                                 BasePtr, Offset, AM);
9934   else
9935     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9936                                  BasePtr, Offset, AM);
9937   ++PreIndexedNodes;
9938   ++NodesCombined;
9939   DEBUG(dbgs() << "\nReplacing.4 ";
9940         N->dump(&DAG);
9941         dbgs() << "\nWith: ";
9942         Result.getNode()->dump(&DAG);
9943         dbgs() << '\n');
9944   WorklistRemover DeadNodes(*this);
9945   if (isLoad) {
9946     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9947     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9948   } else {
9949     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9950   }
9951 
9952   // Finally, since the node is now dead, remove it from the graph.
9953   deleteAndRecombine(N);
9954 
9955   if (Swapped)
9956     std::swap(BasePtr, Offset);
9957 
9958   // Replace other uses of BasePtr that can be updated to use Ptr
9959   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
9960     unsigned OffsetIdx = 1;
9961     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
9962       OffsetIdx = 0;
9963     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
9964            BasePtr.getNode() && "Expected BasePtr operand");
9965 
9966     // We need to replace ptr0 in the following expression:
9967     //   x0 * offset0 + y0 * ptr0 = t0
9968     // knowing that
9969     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
9970     //
9971     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
9972     // indexed load/store and the expresion that needs to be re-written.
9973     //
9974     // Therefore, we have:
9975     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
9976 
9977     ConstantSDNode *CN =
9978       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
9979     int X0, X1, Y0, Y1;
9980     const APInt &Offset0 = CN->getAPIntValue();
9981     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
9982 
9983     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
9984     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
9985     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
9986     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
9987 
9988     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
9989 
9990     APInt CNV = Offset0;
9991     if (X0 < 0) CNV = -CNV;
9992     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
9993     else CNV = CNV - Offset1;
9994 
9995     SDLoc DL(OtherUses[i]);
9996 
9997     // We can now generate the new expression.
9998     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
9999     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
10000 
10001     SDValue NewUse = DAG.getNode(Opcode,
10002                                  DL,
10003                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
10004     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
10005     deleteAndRecombine(OtherUses[i]);
10006   }
10007 
10008   // Replace the uses of Ptr with uses of the updated base value.
10009   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
10010   deleteAndRecombine(Ptr.getNode());
10011 
10012   return true;
10013 }
10014 
10015 /// Try to combine a load/store with a add/sub of the base pointer node into a
10016 /// post-indexed load/store. The transformation folded the add/subtract into the
10017 /// new indexed load/store effectively and all of its uses are redirected to the
10018 /// new load/store.
10019 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
10020   if (Level < AfterLegalizeDAG)
10021     return false;
10022 
10023   bool isLoad = true;
10024   SDValue Ptr;
10025   EVT VT;
10026   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10027     if (LD->isIndexed())
10028       return false;
10029     VT = LD->getMemoryVT();
10030     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
10031         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
10032       return false;
10033     Ptr = LD->getBasePtr();
10034   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10035     if (ST->isIndexed())
10036       return false;
10037     VT = ST->getMemoryVT();
10038     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
10039         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
10040       return false;
10041     Ptr = ST->getBasePtr();
10042     isLoad = false;
10043   } else {
10044     return false;
10045   }
10046 
10047   if (Ptr.getNode()->hasOneUse())
10048     return false;
10049 
10050   for (SDNode *Op : Ptr.getNode()->uses()) {
10051     if (Op == N ||
10052         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
10053       continue;
10054 
10055     SDValue BasePtr;
10056     SDValue Offset;
10057     ISD::MemIndexedMode AM = ISD::UNINDEXED;
10058     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
10059       // Don't create a indexed load / store with zero offset.
10060       if (isNullConstant(Offset))
10061         continue;
10062 
10063       // Try turning it into a post-indexed load / store except when
10064       // 1) All uses are load / store ops that use it as base ptr (and
10065       //    it may be folded as addressing mmode).
10066       // 2) Op must be independent of N, i.e. Op is neither a predecessor
10067       //    nor a successor of N. Otherwise, if Op is folded that would
10068       //    create a cycle.
10069 
10070       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10071         continue;
10072 
10073       // Check for #1.
10074       bool TryNext = false;
10075       for (SDNode *Use : BasePtr.getNode()->uses()) {
10076         if (Use == Ptr.getNode())
10077           continue;
10078 
10079         // If all the uses are load / store addresses, then don't do the
10080         // transformation.
10081         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10082           bool RealUse = false;
10083           for (SDNode *UseUse : Use->uses()) {
10084             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10085               RealUse = true;
10086           }
10087 
10088           if (!RealUse) {
10089             TryNext = true;
10090             break;
10091           }
10092         }
10093       }
10094 
10095       if (TryNext)
10096         continue;
10097 
10098       // Check for #2
10099       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10100         SDValue Result = isLoad
10101           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10102                                BasePtr, Offset, AM)
10103           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10104                                 BasePtr, Offset, AM);
10105         ++PostIndexedNodes;
10106         ++NodesCombined;
10107         DEBUG(dbgs() << "\nReplacing.5 ";
10108               N->dump(&DAG);
10109               dbgs() << "\nWith: ";
10110               Result.getNode()->dump(&DAG);
10111               dbgs() << '\n');
10112         WorklistRemover DeadNodes(*this);
10113         if (isLoad) {
10114           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10115           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10116         } else {
10117           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10118         }
10119 
10120         // Finally, since the node is now dead, remove it from the graph.
10121         deleteAndRecombine(N);
10122 
10123         // Replace the uses of Use with uses of the updated base value.
10124         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
10125                                       Result.getValue(isLoad ? 1 : 0));
10126         deleteAndRecombine(Op);
10127         return true;
10128       }
10129     }
10130   }
10131 
10132   return false;
10133 }
10134 
10135 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10136 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10137   ISD::MemIndexedMode AM = LD->getAddressingMode();
10138   assert(AM != ISD::UNINDEXED);
10139   SDValue BP = LD->getOperand(1);
10140   SDValue Inc = LD->getOperand(2);
10141 
10142   // Some backends use TargetConstants for load offsets, but don't expect
10143   // TargetConstants in general ADD nodes. We can convert these constants into
10144   // regular Constants (if the constant is not opaque).
10145   assert((Inc.getOpcode() != ISD::TargetConstant ||
10146           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10147          "Cannot split out indexing using opaque target constants");
10148   if (Inc.getOpcode() == ISD::TargetConstant) {
10149     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10150     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10151                           ConstInc->getValueType(0));
10152   }
10153 
10154   unsigned Opc =
10155       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
10156   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
10157 }
10158 
10159 SDValue DAGCombiner::visitLOAD(SDNode *N) {
10160   LoadSDNode *LD  = cast<LoadSDNode>(N);
10161   SDValue Chain = LD->getChain();
10162   SDValue Ptr   = LD->getBasePtr();
10163 
10164   // If load is not volatile and there are no uses of the loaded value (and
10165   // the updated indexed value in case of indexed loads), change uses of the
10166   // chain value into uses of the chain input (i.e. delete the dead load).
10167   if (!LD->isVolatile()) {
10168     if (N->getValueType(1) == MVT::Other) {
10169       // Unindexed loads.
10170       if (!N->hasAnyUseOfValue(0)) {
10171         // It's not safe to use the two value CombineTo variant here. e.g.
10172         // v1, chain2 = load chain1, loc
10173         // v2, chain3 = load chain2, loc
10174         // v3         = add v2, c
10175         // Now we replace use of chain2 with chain1.  This makes the second load
10176         // isomorphic to the one we are deleting, and thus makes this load live.
10177         DEBUG(dbgs() << "\nReplacing.6 ";
10178               N->dump(&DAG);
10179               dbgs() << "\nWith chain: ";
10180               Chain.getNode()->dump(&DAG);
10181               dbgs() << "\n");
10182         WorklistRemover DeadNodes(*this);
10183         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10184 
10185         if (N->use_empty())
10186           deleteAndRecombine(N);
10187 
10188         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10189       }
10190     } else {
10191       // Indexed loads.
10192       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
10193 
10194       // If this load has an opaque TargetConstant offset, then we cannot split
10195       // the indexing into an add/sub directly (that TargetConstant may not be
10196       // valid for a different type of node, and we cannot convert an opaque
10197       // target constant into a regular constant).
10198       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
10199                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
10200 
10201       if (!N->hasAnyUseOfValue(0) &&
10202           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
10203         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
10204         SDValue Index;
10205         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
10206           Index = SplitIndexingFromLoad(LD);
10207           // Try to fold the base pointer arithmetic into subsequent loads and
10208           // stores.
10209           AddUsersToWorklist(N);
10210         } else
10211           Index = DAG.getUNDEF(N->getValueType(1));
10212         DEBUG(dbgs() << "\nReplacing.7 ";
10213               N->dump(&DAG);
10214               dbgs() << "\nWith: ";
10215               Undef.getNode()->dump(&DAG);
10216               dbgs() << " and 2 other values\n");
10217         WorklistRemover DeadNodes(*this);
10218         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
10219         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
10220         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
10221         deleteAndRecombine(N);
10222         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10223       }
10224     }
10225   }
10226 
10227   // If this load is directly stored, replace the load value with the stored
10228   // value.
10229   // TODO: Handle store large -> read small portion.
10230   // TODO: Handle TRUNCSTORE/LOADEXT
10231   if (OptLevel != CodeGenOpt::None &&
10232       ISD::isNormalLoad(N) && !LD->isVolatile()) {
10233     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10234       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
10235       if (PrevST->getBasePtr() == Ptr &&
10236           PrevST->getValue().getValueType() == N->getValueType(0))
10237       return CombineTo(N, Chain.getOperand(1), Chain);
10238     }
10239   }
10240 
10241   // Try to infer better alignment information than the load already has.
10242   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
10243     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
10244       if (Align > LD->getMemOperand()->getBaseAlignment()) {
10245         SDValue NewLoad = DAG.getExtLoad(
10246             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
10247             LD->getPointerInfo(), LD->getMemoryVT(), Align,
10248             LD->getMemOperand()->getFlags(), LD->getAAInfo());
10249         if (NewLoad.getNode() != N)
10250           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
10251       }
10252     }
10253   }
10254 
10255   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10256                                                   : DAG.getSubtarget().useAA();
10257 #ifndef NDEBUG
10258   if (CombinerAAOnlyFunc.getNumOccurrences() &&
10259       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10260     UseAA = false;
10261 #endif
10262   if (UseAA && LD->isUnindexed()) {
10263     // Walk up chain skipping non-aliasing memory nodes.
10264     SDValue BetterChain = FindBetterChain(N, Chain);
10265 
10266     // If there is a better chain.
10267     if (Chain != BetterChain) {
10268       SDValue ReplLoad;
10269 
10270       // Replace the chain to void dependency.
10271       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
10272         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
10273                                BetterChain, Ptr, LD->getMemOperand());
10274       } else {
10275         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
10276                                   LD->getValueType(0),
10277                                   BetterChain, Ptr, LD->getMemoryVT(),
10278                                   LD->getMemOperand());
10279       }
10280 
10281       // Create token factor to keep old chain connected.
10282       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
10283                                   MVT::Other, Chain, ReplLoad.getValue(1));
10284 
10285       // Make sure the new and old chains are cleaned up.
10286       AddToWorklist(Token.getNode());
10287 
10288       // Replace uses with load result and token factor. Don't add users
10289       // to work list.
10290       return CombineTo(N, ReplLoad.getValue(0), Token, false);
10291     }
10292   }
10293 
10294   // Try transforming N to an indexed load.
10295   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10296     return SDValue(N, 0);
10297 
10298   // Try to slice up N to more direct loads if the slices are mapped to
10299   // different register banks or pairing can take place.
10300   if (SliceUpLoad(N))
10301     return SDValue(N, 0);
10302 
10303   return SDValue();
10304 }
10305 
10306 namespace {
10307 /// \brief Helper structure used to slice a load in smaller loads.
10308 /// Basically a slice is obtained from the following sequence:
10309 /// Origin = load Ty1, Base
10310 /// Shift = srl Ty1 Origin, CstTy Amount
10311 /// Inst = trunc Shift to Ty2
10312 ///
10313 /// Then, it will be rewriten into:
10314 /// Slice = load SliceTy, Base + SliceOffset
10315 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
10316 ///
10317 /// SliceTy is deduced from the number of bits that are actually used to
10318 /// build Inst.
10319 struct LoadedSlice {
10320   /// \brief Helper structure used to compute the cost of a slice.
10321   struct Cost {
10322     /// Are we optimizing for code size.
10323     bool ForCodeSize;
10324     /// Various cost.
10325     unsigned Loads;
10326     unsigned Truncates;
10327     unsigned CrossRegisterBanksCopies;
10328     unsigned ZExts;
10329     unsigned Shift;
10330 
10331     Cost(bool ForCodeSize = false)
10332         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
10333           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
10334 
10335     /// \brief Get the cost of one isolated slice.
10336     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
10337         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
10338           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
10339       EVT TruncType = LS.Inst->getValueType(0);
10340       EVT LoadedType = LS.getLoadedType();
10341       if (TruncType != LoadedType &&
10342           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
10343         ZExts = 1;
10344     }
10345 
10346     /// \brief Account for slicing gain in the current cost.
10347     /// Slicing provide a few gains like removing a shift or a
10348     /// truncate. This method allows to grow the cost of the original
10349     /// load with the gain from this slice.
10350     void addSliceGain(const LoadedSlice &LS) {
10351       // Each slice saves a truncate.
10352       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
10353       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
10354                               LS.Inst->getValueType(0)))
10355         ++Truncates;
10356       // If there is a shift amount, this slice gets rid of it.
10357       if (LS.Shift)
10358         ++Shift;
10359       // If this slice can merge a cross register bank copy, account for it.
10360       if (LS.canMergeExpensiveCrossRegisterBankCopy())
10361         ++CrossRegisterBanksCopies;
10362     }
10363 
10364     Cost &operator+=(const Cost &RHS) {
10365       Loads += RHS.Loads;
10366       Truncates += RHS.Truncates;
10367       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
10368       ZExts += RHS.ZExts;
10369       Shift += RHS.Shift;
10370       return *this;
10371     }
10372 
10373     bool operator==(const Cost &RHS) const {
10374       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
10375              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
10376              ZExts == RHS.ZExts && Shift == RHS.Shift;
10377     }
10378 
10379     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
10380 
10381     bool operator<(const Cost &RHS) const {
10382       // Assume cross register banks copies are as expensive as loads.
10383       // FIXME: Do we want some more target hooks?
10384       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
10385       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
10386       // Unless we are optimizing for code size, consider the
10387       // expensive operation first.
10388       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
10389         return ExpensiveOpsLHS < ExpensiveOpsRHS;
10390       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
10391              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
10392     }
10393 
10394     bool operator>(const Cost &RHS) const { return RHS < *this; }
10395 
10396     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
10397 
10398     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
10399   };
10400   // The last instruction that represent the slice. This should be a
10401   // truncate instruction.
10402   SDNode *Inst;
10403   // The original load instruction.
10404   LoadSDNode *Origin;
10405   // The right shift amount in bits from the original load.
10406   unsigned Shift;
10407   // The DAG from which Origin came from.
10408   // This is used to get some contextual information about legal types, etc.
10409   SelectionDAG *DAG;
10410 
10411   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
10412               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
10413       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
10414 
10415   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
10416   /// \return Result is \p BitWidth and has used bits set to 1 and
10417   ///         not used bits set to 0.
10418   APInt getUsedBits() const {
10419     // Reproduce the trunc(lshr) sequence:
10420     // - Start from the truncated value.
10421     // - Zero extend to the desired bit width.
10422     // - Shift left.
10423     assert(Origin && "No original load to compare against.");
10424     unsigned BitWidth = Origin->getValueSizeInBits(0);
10425     assert(Inst && "This slice is not bound to an instruction");
10426     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
10427            "Extracted slice is bigger than the whole type!");
10428     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
10429     UsedBits.setAllBits();
10430     UsedBits = UsedBits.zext(BitWidth);
10431     UsedBits <<= Shift;
10432     return UsedBits;
10433   }
10434 
10435   /// \brief Get the size of the slice to be loaded in bytes.
10436   unsigned getLoadedSize() const {
10437     unsigned SliceSize = getUsedBits().countPopulation();
10438     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
10439     return SliceSize / 8;
10440   }
10441 
10442   /// \brief Get the type that will be loaded for this slice.
10443   /// Note: This may not be the final type for the slice.
10444   EVT getLoadedType() const {
10445     assert(DAG && "Missing context");
10446     LLVMContext &Ctxt = *DAG->getContext();
10447     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
10448   }
10449 
10450   /// \brief Get the alignment of the load used for this slice.
10451   unsigned getAlignment() const {
10452     unsigned Alignment = Origin->getAlignment();
10453     unsigned Offset = getOffsetFromBase();
10454     if (Offset != 0)
10455       Alignment = MinAlign(Alignment, Alignment + Offset);
10456     return Alignment;
10457   }
10458 
10459   /// \brief Check if this slice can be rewritten with legal operations.
10460   bool isLegal() const {
10461     // An invalid slice is not legal.
10462     if (!Origin || !Inst || !DAG)
10463       return false;
10464 
10465     // Offsets are for indexed load only, we do not handle that.
10466     if (!Origin->getOffset().isUndef())
10467       return false;
10468 
10469     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10470 
10471     // Check that the type is legal.
10472     EVT SliceType = getLoadedType();
10473     if (!TLI.isTypeLegal(SliceType))
10474       return false;
10475 
10476     // Check that the load is legal for this type.
10477     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
10478       return false;
10479 
10480     // Check that the offset can be computed.
10481     // 1. Check its type.
10482     EVT PtrType = Origin->getBasePtr().getValueType();
10483     if (PtrType == MVT::Untyped || PtrType.isExtended())
10484       return false;
10485 
10486     // 2. Check that it fits in the immediate.
10487     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
10488       return false;
10489 
10490     // 3. Check that the computation is legal.
10491     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
10492       return false;
10493 
10494     // Check that the zext is legal if it needs one.
10495     EVT TruncateType = Inst->getValueType(0);
10496     if (TruncateType != SliceType &&
10497         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
10498       return false;
10499 
10500     return true;
10501   }
10502 
10503   /// \brief Get the offset in bytes of this slice in the original chunk of
10504   /// bits.
10505   /// \pre DAG != nullptr.
10506   uint64_t getOffsetFromBase() const {
10507     assert(DAG && "Missing context.");
10508     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
10509     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
10510     uint64_t Offset = Shift / 8;
10511     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
10512     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
10513            "The size of the original loaded type is not a multiple of a"
10514            " byte.");
10515     // If Offset is bigger than TySizeInBytes, it means we are loading all
10516     // zeros. This should have been optimized before in the process.
10517     assert(TySizeInBytes > Offset &&
10518            "Invalid shift amount for given loaded size");
10519     if (IsBigEndian)
10520       Offset = TySizeInBytes - Offset - getLoadedSize();
10521     return Offset;
10522   }
10523 
10524   /// \brief Generate the sequence of instructions to load the slice
10525   /// represented by this object and redirect the uses of this slice to
10526   /// this new sequence of instructions.
10527   /// \pre this->Inst && this->Origin are valid Instructions and this
10528   /// object passed the legal check: LoadedSlice::isLegal returned true.
10529   /// \return The last instruction of the sequence used to load the slice.
10530   SDValue loadSlice() const {
10531     assert(Inst && Origin && "Unable to replace a non-existing slice.");
10532     const SDValue &OldBaseAddr = Origin->getBasePtr();
10533     SDValue BaseAddr = OldBaseAddr;
10534     // Get the offset in that chunk of bytes w.r.t. the endianess.
10535     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
10536     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
10537     if (Offset) {
10538       // BaseAddr = BaseAddr + Offset.
10539       EVT ArithType = BaseAddr.getValueType();
10540       SDLoc DL(Origin);
10541       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
10542                               DAG->getConstant(Offset, DL, ArithType));
10543     }
10544 
10545     // Create the type of the loaded slice according to its size.
10546     EVT SliceType = getLoadedType();
10547 
10548     // Create the load for the slice.
10549     SDValue LastInst =
10550         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
10551                      Origin->getPointerInfo().getWithOffset(Offset),
10552                      getAlignment(), Origin->getMemOperand()->getFlags());
10553     // If the final type is not the same as the loaded type, this means that
10554     // we have to pad with zero. Create a zero extend for that.
10555     EVT FinalType = Inst->getValueType(0);
10556     if (SliceType != FinalType)
10557       LastInst =
10558           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
10559     return LastInst;
10560   }
10561 
10562   /// \brief Check if this slice can be merged with an expensive cross register
10563   /// bank copy. E.g.,
10564   /// i = load i32
10565   /// f = bitcast i32 i to float
10566   bool canMergeExpensiveCrossRegisterBankCopy() const {
10567     if (!Inst || !Inst->hasOneUse())
10568       return false;
10569     SDNode *Use = *Inst->use_begin();
10570     if (Use->getOpcode() != ISD::BITCAST)
10571       return false;
10572     assert(DAG && "Missing context");
10573     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10574     EVT ResVT = Use->getValueType(0);
10575     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
10576     const TargetRegisterClass *ArgRC =
10577         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
10578     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
10579       return false;
10580 
10581     // At this point, we know that we perform a cross-register-bank copy.
10582     // Check if it is expensive.
10583     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
10584     // Assume bitcasts are cheap, unless both register classes do not
10585     // explicitly share a common sub class.
10586     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
10587       return false;
10588 
10589     // Check if it will be merged with the load.
10590     // 1. Check the alignment constraint.
10591     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
10592         ResVT.getTypeForEVT(*DAG->getContext()));
10593 
10594     if (RequiredAlignment > getAlignment())
10595       return false;
10596 
10597     // 2. Check that the load is a legal operation for that type.
10598     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
10599       return false;
10600 
10601     // 3. Check that we do not have a zext in the way.
10602     if (Inst->getValueType(0) != getLoadedType())
10603       return false;
10604 
10605     return true;
10606   }
10607 };
10608 }
10609 
10610 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
10611 /// \p UsedBits looks like 0..0 1..1 0..0.
10612 static bool areUsedBitsDense(const APInt &UsedBits) {
10613   // If all the bits are one, this is dense!
10614   if (UsedBits.isAllOnesValue())
10615     return true;
10616 
10617   // Get rid of the unused bits on the right.
10618   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
10619   // Get rid of the unused bits on the left.
10620   if (NarrowedUsedBits.countLeadingZeros())
10621     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
10622   // Check that the chunk of bits is completely used.
10623   return NarrowedUsedBits.isAllOnesValue();
10624 }
10625 
10626 /// \brief Check whether or not \p First and \p Second are next to each other
10627 /// in memory. This means that there is no hole between the bits loaded
10628 /// by \p First and the bits loaded by \p Second.
10629 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10630                                      const LoadedSlice &Second) {
10631   assert(First.Origin == Second.Origin && First.Origin &&
10632          "Unable to match different memory origins.");
10633   APInt UsedBits = First.getUsedBits();
10634   assert((UsedBits & Second.getUsedBits()) == 0 &&
10635          "Slices are not supposed to overlap.");
10636   UsedBits |= Second.getUsedBits();
10637   return areUsedBitsDense(UsedBits);
10638 }
10639 
10640 /// \brief Adjust the \p GlobalLSCost according to the target
10641 /// paring capabilities and the layout of the slices.
10642 /// \pre \p GlobalLSCost should account for at least as many loads as
10643 /// there is in the slices in \p LoadedSlices.
10644 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10645                                  LoadedSlice::Cost &GlobalLSCost) {
10646   unsigned NumberOfSlices = LoadedSlices.size();
10647   // If there is less than 2 elements, no pairing is possible.
10648   if (NumberOfSlices < 2)
10649     return;
10650 
10651   // Sort the slices so that elements that are likely to be next to each
10652   // other in memory are next to each other in the list.
10653   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10654             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10655     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10656     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10657   });
10658   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10659   // First (resp. Second) is the first (resp. Second) potentially candidate
10660   // to be placed in a paired load.
10661   const LoadedSlice *First = nullptr;
10662   const LoadedSlice *Second = nullptr;
10663   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10664                 // Set the beginning of the pair.
10665                                                            First = Second) {
10666 
10667     Second = &LoadedSlices[CurrSlice];
10668 
10669     // If First is NULL, it means we start a new pair.
10670     // Get to the next slice.
10671     if (!First)
10672       continue;
10673 
10674     EVT LoadedType = First->getLoadedType();
10675 
10676     // If the types of the slices are different, we cannot pair them.
10677     if (LoadedType != Second->getLoadedType())
10678       continue;
10679 
10680     // Check if the target supplies paired loads for this type.
10681     unsigned RequiredAlignment = 0;
10682     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10683       // move to the next pair, this type is hopeless.
10684       Second = nullptr;
10685       continue;
10686     }
10687     // Check if we meet the alignment requirement.
10688     if (RequiredAlignment > First->getAlignment())
10689       continue;
10690 
10691     // Check that both loads are next to each other in memory.
10692     if (!areSlicesNextToEachOther(*First, *Second))
10693       continue;
10694 
10695     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10696     --GlobalLSCost.Loads;
10697     // Move to the next pair.
10698     Second = nullptr;
10699   }
10700 }
10701 
10702 /// \brief Check the profitability of all involved LoadedSlice.
10703 /// Currently, it is considered profitable if there is exactly two
10704 /// involved slices (1) which are (2) next to each other in memory, and
10705 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10706 ///
10707 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10708 /// the elements themselves.
10709 ///
10710 /// FIXME: When the cost model will be mature enough, we can relax
10711 /// constraints (1) and (2).
10712 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10713                                 const APInt &UsedBits, bool ForCodeSize) {
10714   unsigned NumberOfSlices = LoadedSlices.size();
10715   if (StressLoadSlicing)
10716     return NumberOfSlices > 1;
10717 
10718   // Check (1).
10719   if (NumberOfSlices != 2)
10720     return false;
10721 
10722   // Check (2).
10723   if (!areUsedBitsDense(UsedBits))
10724     return false;
10725 
10726   // Check (3).
10727   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10728   // The original code has one big load.
10729   OrigCost.Loads = 1;
10730   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10731     const LoadedSlice &LS = LoadedSlices[CurrSlice];
10732     // Accumulate the cost of all the slices.
10733     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10734     GlobalSlicingCost += SliceCost;
10735 
10736     // Account as cost in the original configuration the gain obtained
10737     // with the current slices.
10738     OrigCost.addSliceGain(LS);
10739   }
10740 
10741   // If the target supports paired load, adjust the cost accordingly.
10742   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10743   return OrigCost > GlobalSlicingCost;
10744 }
10745 
10746 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10747 /// operations, split it in the various pieces being extracted.
10748 ///
10749 /// This sort of thing is introduced by SROA.
10750 /// This slicing takes care not to insert overlapping loads.
10751 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10752 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10753   if (Level < AfterLegalizeDAG)
10754     return false;
10755 
10756   LoadSDNode *LD = cast<LoadSDNode>(N);
10757   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10758       !LD->getValueType(0).isInteger())
10759     return false;
10760 
10761   // Keep track of already used bits to detect overlapping values.
10762   // In that case, we will just abort the transformation.
10763   APInt UsedBits(LD->getValueSizeInBits(0), 0);
10764 
10765   SmallVector<LoadedSlice, 4> LoadedSlices;
10766 
10767   // Check if this load is used as several smaller chunks of bits.
10768   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10769   // of computation for each trunc.
10770   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10771        UI != UIEnd; ++UI) {
10772     // Skip the uses of the chain.
10773     if (UI.getUse().getResNo() != 0)
10774       continue;
10775 
10776     SDNode *User = *UI;
10777     unsigned Shift = 0;
10778 
10779     // Check if this is a trunc(lshr).
10780     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10781         isa<ConstantSDNode>(User->getOperand(1))) {
10782       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10783       User = *User->use_begin();
10784     }
10785 
10786     // At this point, User is a Truncate, iff we encountered, trunc or
10787     // trunc(lshr).
10788     if (User->getOpcode() != ISD::TRUNCATE)
10789       return false;
10790 
10791     // The width of the type must be a power of 2 and greater than 8-bits.
10792     // Otherwise the load cannot be represented in LLVM IR.
10793     // Moreover, if we shifted with a non-8-bits multiple, the slice
10794     // will be across several bytes. We do not support that.
10795     unsigned Width = User->getValueSizeInBits(0);
10796     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10797       return 0;
10798 
10799     // Build the slice for this chain of computations.
10800     LoadedSlice LS(User, LD, Shift, &DAG);
10801     APInt CurrentUsedBits = LS.getUsedBits();
10802 
10803     // Check if this slice overlaps with another.
10804     if ((CurrentUsedBits & UsedBits) != 0)
10805       return false;
10806     // Update the bits used globally.
10807     UsedBits |= CurrentUsedBits;
10808 
10809     // Check if the new slice would be legal.
10810     if (!LS.isLegal())
10811       return false;
10812 
10813     // Record the slice.
10814     LoadedSlices.push_back(LS);
10815   }
10816 
10817   // Abort slicing if it does not seem to be profitable.
10818   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10819     return false;
10820 
10821   ++SlicedLoads;
10822 
10823   // Rewrite each chain to use an independent load.
10824   // By construction, each chain can be represented by a unique load.
10825 
10826   // Prepare the argument for the new token factor for all the slices.
10827   SmallVector<SDValue, 8> ArgChains;
10828   for (SmallVectorImpl<LoadedSlice>::const_iterator
10829            LSIt = LoadedSlices.begin(),
10830            LSItEnd = LoadedSlices.end();
10831        LSIt != LSItEnd; ++LSIt) {
10832     SDValue SliceInst = LSIt->loadSlice();
10833     CombineTo(LSIt->Inst, SliceInst, true);
10834     if (SliceInst.getOpcode() != ISD::LOAD)
10835       SliceInst = SliceInst.getOperand(0);
10836     assert(SliceInst->getOpcode() == ISD::LOAD &&
10837            "It takes more than a zext to get to the loaded slice!!");
10838     ArgChains.push_back(SliceInst.getValue(1));
10839   }
10840 
10841   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
10842                               ArgChains);
10843   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10844   return true;
10845 }
10846 
10847 /// Check to see if V is (and load (ptr), imm), where the load is having
10848 /// specific bytes cleared out.  If so, return the byte size being masked out
10849 /// and the shift amount.
10850 static std::pair<unsigned, unsigned>
10851 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
10852   std::pair<unsigned, unsigned> Result(0, 0);
10853 
10854   // Check for the structure we're looking for.
10855   if (V->getOpcode() != ISD::AND ||
10856       !isa<ConstantSDNode>(V->getOperand(1)) ||
10857       !ISD::isNormalLoad(V->getOperand(0).getNode()))
10858     return Result;
10859 
10860   // Check the chain and pointer.
10861   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
10862   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
10863 
10864   // The store should be chained directly to the load or be an operand of a
10865   // tokenfactor.
10866   if (LD == Chain.getNode())
10867     ; // ok.
10868   else if (Chain->getOpcode() != ISD::TokenFactor)
10869     return Result; // Fail.
10870   else {
10871     bool isOk = false;
10872     for (const SDValue &ChainOp : Chain->op_values())
10873       if (ChainOp.getNode() == LD) {
10874         isOk = true;
10875         break;
10876       }
10877     if (!isOk) return Result;
10878   }
10879 
10880   // This only handles simple types.
10881   if (V.getValueType() != MVT::i16 &&
10882       V.getValueType() != MVT::i32 &&
10883       V.getValueType() != MVT::i64)
10884     return Result;
10885 
10886   // Check the constant mask.  Invert it so that the bits being masked out are
10887   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
10888   // follow the sign bit for uniformity.
10889   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
10890   unsigned NotMaskLZ = countLeadingZeros(NotMask);
10891   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
10892   unsigned NotMaskTZ = countTrailingZeros(NotMask);
10893   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
10894   if (NotMaskLZ == 64) return Result;  // All zero mask.
10895 
10896   // See if we have a continuous run of bits.  If so, we have 0*1+0*
10897   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
10898     return Result;
10899 
10900   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
10901   if (V.getValueType() != MVT::i64 && NotMaskLZ)
10902     NotMaskLZ -= 64-V.getValueSizeInBits();
10903 
10904   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
10905   switch (MaskedBytes) {
10906   case 1:
10907   case 2:
10908   case 4: break;
10909   default: return Result; // All one mask, or 5-byte mask.
10910   }
10911 
10912   // Verify that the first bit starts at a multiple of mask so that the access
10913   // is aligned the same as the access width.
10914   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
10915 
10916   Result.first = MaskedBytes;
10917   Result.second = NotMaskTZ/8;
10918   return Result;
10919 }
10920 
10921 
10922 /// Check to see if IVal is something that provides a value as specified by
10923 /// MaskInfo. If so, replace the specified store with a narrower store of
10924 /// truncated IVal.
10925 static SDNode *
10926 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
10927                                 SDValue IVal, StoreSDNode *St,
10928                                 DAGCombiner *DC) {
10929   unsigned NumBytes = MaskInfo.first;
10930   unsigned ByteShift = MaskInfo.second;
10931   SelectionDAG &DAG = DC->getDAG();
10932 
10933   // Check to see if IVal is all zeros in the part being masked in by the 'or'
10934   // that uses this.  If not, this is not a replacement.
10935   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
10936                                   ByteShift*8, (ByteShift+NumBytes)*8);
10937   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
10938 
10939   // Check that it is legal on the target to do this.  It is legal if the new
10940   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
10941   // legalization.
10942   MVT VT = MVT::getIntegerVT(NumBytes*8);
10943   if (!DC->isTypeLegal(VT))
10944     return nullptr;
10945 
10946   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
10947   // shifted by ByteShift and truncated down to NumBytes.
10948   if (ByteShift) {
10949     SDLoc DL(IVal);
10950     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
10951                        DAG.getConstant(ByteShift*8, DL,
10952                                     DC->getShiftAmountTy(IVal.getValueType())));
10953   }
10954 
10955   // Figure out the offset for the store and the alignment of the access.
10956   unsigned StOffset;
10957   unsigned NewAlign = St->getAlignment();
10958 
10959   if (DAG.getDataLayout().isLittleEndian())
10960     StOffset = ByteShift;
10961   else
10962     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
10963 
10964   SDValue Ptr = St->getBasePtr();
10965   if (StOffset) {
10966     SDLoc DL(IVal);
10967     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
10968                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
10969     NewAlign = MinAlign(NewAlign, StOffset);
10970   }
10971 
10972   // Truncate down to the new size.
10973   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
10974 
10975   ++OpsNarrowed;
10976   return DAG
10977       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
10978                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
10979       .getNode();
10980 }
10981 
10982 
10983 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
10984 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
10985 /// narrowing the load and store if it would end up being a win for performance
10986 /// or code size.
10987 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
10988   StoreSDNode *ST  = cast<StoreSDNode>(N);
10989   if (ST->isVolatile())
10990     return SDValue();
10991 
10992   SDValue Chain = ST->getChain();
10993   SDValue Value = ST->getValue();
10994   SDValue Ptr   = ST->getBasePtr();
10995   EVT VT = Value.getValueType();
10996 
10997   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
10998     return SDValue();
10999 
11000   unsigned Opc = Value.getOpcode();
11001 
11002   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
11003   // is a byte mask indicating a consecutive number of bytes, check to see if
11004   // Y is known to provide just those bytes.  If so, we try to replace the
11005   // load + replace + store sequence with a single (narrower) store, which makes
11006   // the load dead.
11007   if (Opc == ISD::OR) {
11008     std::pair<unsigned, unsigned> MaskedLoad;
11009     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
11010     if (MaskedLoad.first)
11011       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11012                                                   Value.getOperand(1), ST,this))
11013         return SDValue(NewST, 0);
11014 
11015     // Or is commutative, so try swapping X and Y.
11016     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
11017     if (MaskedLoad.first)
11018       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11019                                                   Value.getOperand(0), ST,this))
11020         return SDValue(NewST, 0);
11021   }
11022 
11023   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
11024       Value.getOperand(1).getOpcode() != ISD::Constant)
11025     return SDValue();
11026 
11027   SDValue N0 = Value.getOperand(0);
11028   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11029       Chain == SDValue(N0.getNode(), 1)) {
11030     LoadSDNode *LD = cast<LoadSDNode>(N0);
11031     if (LD->getBasePtr() != Ptr ||
11032         LD->getPointerInfo().getAddrSpace() !=
11033         ST->getPointerInfo().getAddrSpace())
11034       return SDValue();
11035 
11036     // Find the type to narrow it the load / op / store to.
11037     SDValue N1 = Value.getOperand(1);
11038     unsigned BitWidth = N1.getValueSizeInBits();
11039     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
11040     if (Opc == ISD::AND)
11041       Imm ^= APInt::getAllOnesValue(BitWidth);
11042     if (Imm == 0 || Imm.isAllOnesValue())
11043       return SDValue();
11044     unsigned ShAmt = Imm.countTrailingZeros();
11045     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
11046     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
11047     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11048     // The narrowing should be profitable, the load/store operation should be
11049     // legal (or custom) and the store size should be equal to the NewVT width.
11050     while (NewBW < BitWidth &&
11051            (NewVT.getStoreSizeInBits() != NewBW ||
11052             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
11053             !TLI.isNarrowingProfitable(VT, NewVT))) {
11054       NewBW = NextPowerOf2(NewBW);
11055       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11056     }
11057     if (NewBW >= BitWidth)
11058       return SDValue();
11059 
11060     // If the lsb changed does not start at the type bitwidth boundary,
11061     // start at the previous one.
11062     if (ShAmt % NewBW)
11063       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
11064     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
11065                                    std::min(BitWidth, ShAmt + NewBW));
11066     if ((Imm & Mask) == Imm) {
11067       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
11068       if (Opc == ISD::AND)
11069         NewImm ^= APInt::getAllOnesValue(NewBW);
11070       uint64_t PtrOff = ShAmt / 8;
11071       // For big endian targets, we need to adjust the offset to the pointer to
11072       // load the correct bytes.
11073       if (DAG.getDataLayout().isBigEndian())
11074         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11075 
11076       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11077       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11078       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11079         return SDValue();
11080 
11081       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11082                                    Ptr.getValueType(), Ptr,
11083                                    DAG.getConstant(PtrOff, SDLoc(LD),
11084                                                    Ptr.getValueType()));
11085       SDValue NewLD =
11086           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11087                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11088                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
11089       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11090                                    DAG.getConstant(NewImm, SDLoc(Value),
11091                                                    NewVT));
11092       SDValue NewST =
11093           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11094                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11095 
11096       AddToWorklist(NewPtr.getNode());
11097       AddToWorklist(NewLD.getNode());
11098       AddToWorklist(NewVal.getNode());
11099       WorklistRemover DeadNodes(*this);
11100       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11101       ++OpsNarrowed;
11102       return NewST;
11103     }
11104   }
11105 
11106   return SDValue();
11107 }
11108 
11109 /// For a given floating point load / store pair, if the load value isn't used
11110 /// by any other operations, then consider transforming the pair to integer
11111 /// load / store operations if the target deems the transformation profitable.
11112 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11113   StoreSDNode *ST  = cast<StoreSDNode>(N);
11114   SDValue Chain = ST->getChain();
11115   SDValue Value = ST->getValue();
11116   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11117       Value.hasOneUse() &&
11118       Chain == SDValue(Value.getNode(), 1)) {
11119     LoadSDNode *LD = cast<LoadSDNode>(Value);
11120     EVT VT = LD->getMemoryVT();
11121     if (!VT.isFloatingPoint() ||
11122         VT != ST->getMemoryVT() ||
11123         LD->isNonTemporal() ||
11124         ST->isNonTemporal() ||
11125         LD->getPointerInfo().getAddrSpace() != 0 ||
11126         ST->getPointerInfo().getAddrSpace() != 0)
11127       return SDValue();
11128 
11129     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11130     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11131         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11132         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
11133         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
11134       return SDValue();
11135 
11136     unsigned LDAlign = LD->getAlignment();
11137     unsigned STAlign = ST->getAlignment();
11138     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11139     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11140     if (LDAlign < ABIAlign || STAlign < ABIAlign)
11141       return SDValue();
11142 
11143     SDValue NewLD =
11144         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11145                     LD->getPointerInfo(), LDAlign);
11146 
11147     SDValue NewST =
11148         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11149                      ST->getPointerInfo(), STAlign);
11150 
11151     AddToWorklist(NewLD.getNode());
11152     AddToWorklist(NewST.getNode());
11153     WorklistRemover DeadNodes(*this);
11154     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11155     ++LdStFP2Int;
11156     return NewST;
11157   }
11158 
11159   return SDValue();
11160 }
11161 
11162 namespace {
11163 /// Helper struct to parse and store a memory address as base + index + offset.
11164 /// We ignore sign extensions when it is safe to do so.
11165 /// The following two expressions are not equivalent. To differentiate we need
11166 /// to store whether there was a sign extension involved in the index
11167 /// computation.
11168 ///  (load (i64 add (i64 copyfromreg %c)
11169 ///                 (i64 signextend (add (i8 load %index)
11170 ///                                      (i8 1))))
11171 /// vs
11172 ///
11173 /// (load (i64 add (i64 copyfromreg %c)
11174 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
11175 ///                                         (i32 1)))))
11176 struct BaseIndexOffset {
11177   SDValue Base;
11178   SDValue Index;
11179   int64_t Offset;
11180   bool IsIndexSignExt;
11181 
11182   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
11183 
11184   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
11185                   bool IsIndexSignExt) :
11186     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
11187 
11188   bool equalBaseIndex(const BaseIndexOffset &Other) {
11189     return Other.Base == Base && Other.Index == Index &&
11190       Other.IsIndexSignExt == IsIndexSignExt;
11191   }
11192 
11193   /// Parses tree in Ptr for base, index, offset addresses.
11194   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
11195     bool IsIndexSignExt = false;
11196 
11197     // Split up a folded GlobalAddress+Offset into its component parts.
11198     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
11199       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
11200         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
11201                                                     SDLoc(GA),
11202                                                     GA->getValueType(0),
11203                                                     /*Offset=*/0,
11204                                                     /*isTargetGA=*/false,
11205                                                     GA->getTargetFlags()),
11206                                SDValue(),
11207                                GA->getOffset(),
11208                                IsIndexSignExt);
11209       }
11210 
11211     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
11212     // instruction, then it could be just the BASE or everything else we don't
11213     // know how to handle. Just use Ptr as BASE and give up.
11214     if (Ptr->getOpcode() != ISD::ADD)
11215       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11216 
11217     // We know that we have at least an ADD instruction. Try to pattern match
11218     // the simple case of BASE + OFFSET.
11219     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
11220       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
11221       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
11222                               IsIndexSignExt);
11223     }
11224 
11225     // Inside a loop the current BASE pointer is calculated using an ADD and a
11226     // MUL instruction. In this case Ptr is the actual BASE pointer.
11227     // (i64 add (i64 %array_ptr)
11228     //          (i64 mul (i64 %induction_var)
11229     //                   (i64 %element_size)))
11230     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
11231       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11232 
11233     // Look at Base + Index + Offset cases.
11234     SDValue Base = Ptr->getOperand(0);
11235     SDValue IndexOffset = Ptr->getOperand(1);
11236 
11237     // Skip signextends.
11238     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
11239       IndexOffset = IndexOffset->getOperand(0);
11240       IsIndexSignExt = true;
11241     }
11242 
11243     // Either the case of Base + Index (no offset) or something else.
11244     if (IndexOffset->getOpcode() != ISD::ADD)
11245       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
11246 
11247     // Now we have the case of Base + Index + offset.
11248     SDValue Index = IndexOffset->getOperand(0);
11249     SDValue Offset = IndexOffset->getOperand(1);
11250 
11251     if (!isa<ConstantSDNode>(Offset))
11252       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11253 
11254     // Ignore signextends.
11255     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
11256       Index = Index->getOperand(0);
11257       IsIndexSignExt = true;
11258     } else IsIndexSignExt = false;
11259 
11260     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
11261     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
11262   }
11263 };
11264 } // namespace
11265 
11266 // This is a helper function for visitMUL to check the profitability
11267 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
11268 // MulNode is the original multiply, AddNode is (add x, c1),
11269 // and ConstNode is c2.
11270 //
11271 // If the (add x, c1) has multiple uses, we could increase
11272 // the number of adds if we make this transformation.
11273 // It would only be worth doing this if we can remove a
11274 // multiply in the process. Check for that here.
11275 // To illustrate:
11276 //     (A + c1) * c3
11277 //     (A + c2) * c3
11278 // We're checking for cases where we have common "c3 * A" expressions.
11279 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
11280                                               SDValue &AddNode,
11281                                               SDValue &ConstNode) {
11282   APInt Val;
11283 
11284   // If the add only has one use, this would be OK to do.
11285   if (AddNode.getNode()->hasOneUse())
11286     return true;
11287 
11288   // Walk all the users of the constant with which we're multiplying.
11289   for (SDNode *Use : ConstNode->uses()) {
11290 
11291     if (Use == MulNode) // This use is the one we're on right now. Skip it.
11292       continue;
11293 
11294     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
11295       SDNode *OtherOp;
11296       SDNode *MulVar = AddNode.getOperand(0).getNode();
11297 
11298       // OtherOp is what we're multiplying against the constant.
11299       if (Use->getOperand(0) == ConstNode)
11300         OtherOp = Use->getOperand(1).getNode();
11301       else
11302         OtherOp = Use->getOperand(0).getNode();
11303 
11304       // Check to see if multiply is with the same operand of our "add".
11305       //
11306       //     ConstNode  = CONST
11307       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
11308       //     ...
11309       //     AddNode  = (A + c1)  <-- MulVar is A.
11310       //         = AddNode * ConstNode   <-- current visiting instruction.
11311       //
11312       // If we make this transformation, we will have a common
11313       // multiply (ConstNode * A) that we can save.
11314       if (OtherOp == MulVar)
11315         return true;
11316 
11317       // Now check to see if a future expansion will give us a common
11318       // multiply.
11319       //
11320       //     ConstNode  = CONST
11321       //     AddNode    = (A + c1)
11322       //     ...   = AddNode * ConstNode <-- current visiting instruction.
11323       //     ...
11324       //     OtherOp = (A + c2)
11325       //     Use     = OtherOp * ConstNode <-- visiting Use.
11326       //
11327       // If we make this transformation, we will have a common
11328       // multiply (CONST * A) after we also do the same transformation
11329       // to the "t2" instruction.
11330       if (OtherOp->getOpcode() == ISD::ADD &&
11331           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
11332           OtherOp->getOperand(0).getNode() == MulVar)
11333         return true;
11334     }
11335   }
11336 
11337   // Didn't find a case where this would be profitable.
11338   return false;
11339 }
11340 
11341 SDValue DAGCombiner::getMergedConstantVectorStore(
11342     SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
11343     SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
11344   SmallVector<SDValue, 8> BuildVector;
11345 
11346   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11347     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
11348     Chains.push_back(St->getChain());
11349     BuildVector.push_back(St->getValue());
11350   }
11351 
11352   return DAG.getBuildVector(Ty, SL, BuildVector);
11353 }
11354 
11355 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
11356                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
11357                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
11358   // Make sure we have something to merge.
11359   if (NumStores < 2)
11360     return false;
11361 
11362   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11363   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11364   unsigned LatestNodeUsed = 0;
11365 
11366   for (unsigned i=0; i < NumStores; ++i) {
11367     // Find a chain for the new wide-store operand. Notice that some
11368     // of the store nodes that we found may not be selected for inclusion
11369     // in the wide store. The chain we use needs to be the chain of the
11370     // latest store node which is *used* and replaced by the wide store.
11371     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11372       LatestNodeUsed = i;
11373   }
11374 
11375   SmallVector<SDValue, 8> Chains;
11376 
11377   // The latest Node in the DAG.
11378   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11379   SDLoc DL(StoreNodes[0].MemNode);
11380 
11381   SDValue StoredVal;
11382   if (UseVector) {
11383     bool IsVec = MemVT.isVector();
11384     unsigned Elts = NumStores;
11385     if (IsVec) {
11386       // When merging vector stores, get the total number of elements.
11387       Elts *= MemVT.getVectorNumElements();
11388     }
11389     // Get the type for the merged vector store.
11390     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11391     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
11392 
11393     if (IsConstantSrc) {
11394       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
11395     } else {
11396       SmallVector<SDValue, 8> Ops;
11397       for (unsigned i = 0; i < NumStores; ++i) {
11398         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11399         SDValue Val = St->getValue();
11400         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
11401         if (Val.getValueType() != MemVT)
11402           return false;
11403         Ops.push_back(Val);
11404         Chains.push_back(St->getChain());
11405       }
11406 
11407       // Build the extracted vector elements back into a vector.
11408       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
11409                               DL, Ty, Ops);    }
11410   } else {
11411     // We should always use a vector store when merging extracted vector
11412     // elements, so this path implies a store of constants.
11413     assert(IsConstantSrc && "Merged vector elements should use vector store");
11414 
11415     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
11416     APInt StoreInt(SizeInBits, 0);
11417 
11418     // Construct a single integer constant which is made of the smaller
11419     // constant inputs.
11420     bool IsLE = DAG.getDataLayout().isLittleEndian();
11421     for (unsigned i = 0; i < NumStores; ++i) {
11422       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
11423       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
11424       Chains.push_back(St->getChain());
11425 
11426       SDValue Val = St->getValue();
11427       StoreInt <<= ElementSizeBytes * 8;
11428       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
11429         StoreInt |= C->getAPIntValue().zext(SizeInBits);
11430       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
11431         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
11432       } else {
11433         llvm_unreachable("Invalid constant element type");
11434       }
11435     }
11436 
11437     // Create the new Load and Store operations.
11438     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11439     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
11440   }
11441 
11442   assert(!Chains.empty());
11443 
11444   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11445   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
11446                                   FirstInChain->getBasePtr(),
11447                                   FirstInChain->getPointerInfo(),
11448                                   FirstInChain->getAlignment());
11449 
11450   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11451                                                   : DAG.getSubtarget().useAA();
11452   if (UseAA) {
11453     // Replace all merged stores with the new store.
11454     for (unsigned i = 0; i < NumStores; ++i)
11455       CombineTo(StoreNodes[i].MemNode, NewStore);
11456   } else {
11457     // Replace the last store with the new store.
11458     CombineTo(LatestOp, NewStore);
11459     // Erase all other stores.
11460     for (unsigned i = 0; i < NumStores; ++i) {
11461       if (StoreNodes[i].MemNode == LatestOp)
11462         continue;
11463       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11464       // ReplaceAllUsesWith will replace all uses that existed when it was
11465       // called, but graph optimizations may cause new ones to appear. For
11466       // example, the case in pr14333 looks like
11467       //
11468       //  St's chain -> St -> another store -> X
11469       //
11470       // And the only difference from St to the other store is the chain.
11471       // When we change it's chain to be St's chain they become identical,
11472       // get CSEed and the net result is that X is now a use of St.
11473       // Since we know that St is redundant, just iterate.
11474       while (!St->use_empty())
11475         DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11476       deleteAndRecombine(St);
11477     }
11478   }
11479 
11480   return true;
11481 }
11482 
11483 void DAGCombiner::getStoreMergeAndAliasCandidates(
11484     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
11485     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
11486   // This holds the base pointer, index, and the offset in bytes from the base
11487   // pointer.
11488   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11489 
11490   // We must have a base and an offset.
11491   if (!BasePtr.Base.getNode())
11492     return;
11493 
11494   // Do not handle stores to undef base pointers.
11495   if (BasePtr.Base.isUndef())
11496     return;
11497 
11498   // Walk up the chain and look for nodes with offsets from the same
11499   // base pointer. Stop when reaching an instruction with a different kind
11500   // or instruction which has a different base pointer.
11501   EVT MemVT = St->getMemoryVT();
11502   unsigned Seq = 0;
11503   StoreSDNode *Index = St;
11504 
11505 
11506   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11507                                                   : DAG.getSubtarget().useAA();
11508 
11509   if (UseAA) {
11510     // Look at other users of the same chain. Stores on the same chain do not
11511     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11512     // to be on the same chain, so don't bother looking at adjacent chains.
11513 
11514     SDValue Chain = St->getChain();
11515     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11516       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
11517         if (I.getOperandNo() != 0)
11518           continue;
11519 
11520         if (OtherST->isVolatile() || OtherST->isIndexed())
11521           continue;
11522 
11523         if (OtherST->getMemoryVT() != MemVT)
11524           continue;
11525 
11526         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11527 
11528         if (Ptr.equalBaseIndex(BasePtr))
11529           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11530       }
11531     }
11532 
11533     return;
11534   }
11535 
11536   while (Index) {
11537     // If the chain has more than one use, then we can't reorder the mem ops.
11538     if (Index != St && !SDValue(Index, 0)->hasOneUse())
11539       break;
11540 
11541     // Find the base pointer and offset for this memory node.
11542     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11543 
11544     // Check that the base pointer is the same as the original one.
11545     if (!Ptr.equalBaseIndex(BasePtr))
11546       break;
11547 
11548     // The memory operands must not be volatile.
11549     if (Index->isVolatile() || Index->isIndexed())
11550       break;
11551 
11552     // No truncation.
11553     if (Index->isTruncatingStore())
11554       break;
11555 
11556     // The stored memory type must be the same.
11557     if (Index->getMemoryVT() != MemVT)
11558       break;
11559 
11560     // We do not allow under-aligned stores in order to prevent
11561     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11562     // be irrelevant here; what MATTERS is that we not move memory
11563     // operations that potentially overlap past each-other.
11564     if (Index->getAlignment() < MemVT.getStoreSize())
11565       break;
11566 
11567     // We found a potential memory operand to merge.
11568     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11569 
11570     // Find the next memory operand in the chain. If the next operand in the
11571     // chain is a store then move up and continue the scan with the next
11572     // memory operand. If the next operand is a load save it and use alias
11573     // information to check if it interferes with anything.
11574     SDNode *NextInChain = Index->getChain().getNode();
11575     while (1) {
11576       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
11577         // We found a store node. Use it for the next iteration.
11578         Index = STn;
11579         break;
11580       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
11581         if (Ldn->isVolatile()) {
11582           Index = nullptr;
11583           break;
11584         }
11585 
11586         // Save the load node for later. Continue the scan.
11587         AliasLoadNodes.push_back(Ldn);
11588         NextInChain = Ldn->getChain().getNode();
11589         continue;
11590       } else {
11591         Index = nullptr;
11592         break;
11593       }
11594     }
11595   }
11596 }
11597 
11598 // We need to check that merging these stores does not cause a loop
11599 // in the DAG. Any store candidate may depend on another candidate
11600 // indirectly through its operand (we already consider dependencies
11601 // through the chain). Check in parallel by searching up from
11602 // non-chain operands of candidates.
11603 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
11604     SmallVectorImpl<MemOpLink> &StoreNodes) {
11605   SmallPtrSet<const SDNode *, 16> Visited;
11606   SmallVector<const SDNode *, 8> Worklist;
11607   // search ops of store candidates
11608   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11609     SDNode *n = StoreNodes[i].MemNode;
11610     // Potential loops may happen only through non-chain operands
11611     for (unsigned j = 1; j < n->getNumOperands(); ++j)
11612       Worklist.push_back(n->getOperand(j).getNode());
11613   }
11614   // search through DAG. We can stop early if we find a storenode
11615   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11616     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
11617       return false;
11618   }
11619   return true;
11620 }
11621 
11622 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
11623   if (OptLevel == CodeGenOpt::None)
11624     return false;
11625 
11626   EVT MemVT = St->getMemoryVT();
11627   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11628   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
11629       Attribute::NoImplicitFloat);
11630 
11631   // This function cannot currently deal with non-byte-sized memory sizes.
11632   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
11633     return false;
11634 
11635   if (!MemVT.isSimple())
11636     return false;
11637 
11638   // Perform an early exit check. Do not bother looking at stored values that
11639   // are not constants, loads, or extracted vector elements.
11640   SDValue StoredVal = St->getValue();
11641   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
11642   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
11643                        isa<ConstantFPSDNode>(StoredVal);
11644   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11645                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
11646 
11647   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
11648     return false;
11649 
11650   // Don't merge vectors into wider vectors if the source data comes from loads.
11651   // TODO: This restriction can be lifted by using logic similar to the
11652   // ExtractVecSrc case.
11653   if (MemVT.isVector() && IsLoadSrc)
11654     return false;
11655 
11656   // Only look at ends of store sequences.
11657   SDValue Chain = SDValue(St, 0);
11658   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
11659     return false;
11660 
11661   // Save the LoadSDNodes that we find in the chain.
11662   // We need to make sure that these nodes do not interfere with
11663   // any of the store nodes.
11664   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
11665 
11666   // Save the StoreSDNodes that we find in the chain.
11667   SmallVector<MemOpLink, 8> StoreNodes;
11668 
11669   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
11670 
11671   // Check if there is anything to merge.
11672   if (StoreNodes.size() < 2)
11673     return false;
11674 
11675   // only do dependence check in AA case
11676   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11677                                                   : DAG.getSubtarget().useAA();
11678   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
11679     return false;
11680 
11681   // Sort the memory operands according to their distance from the
11682   // base pointer.  As a secondary criteria: make sure stores coming
11683   // later in the code come first in the list. This is important for
11684   // the non-UseAA case, because we're merging stores into the FINAL
11685   // store along a chain which potentially contains aliasing stores.
11686   // Thus, if there are multiple stores to the same address, the last
11687   // one can be considered for merging but not the others.
11688   std::sort(StoreNodes.begin(), StoreNodes.end(),
11689             [](MemOpLink LHS, MemOpLink RHS) {
11690     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
11691            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
11692             LHS.SequenceNum < RHS.SequenceNum);
11693   });
11694 
11695   // Scan the memory operations on the chain and find the first non-consecutive
11696   // store memory address.
11697   unsigned LastConsecutiveStore = 0;
11698   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
11699   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
11700 
11701     // Check that the addresses are consecutive starting from the second
11702     // element in the list of stores.
11703     if (i > 0) {
11704       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
11705       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11706         break;
11707     }
11708 
11709     // Check if this store interferes with any of the loads that we found.
11710     // If we find a load that alias with this store. Stop the sequence.
11711     if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
11712           return isAlias(Ldn, StoreNodes[i].MemNode);
11713         }))
11714       break;
11715 
11716     // Mark this node as useful.
11717     LastConsecutiveStore = i;
11718   }
11719 
11720   // The node with the lowest store address.
11721   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11722   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
11723   unsigned FirstStoreAlign = FirstInChain->getAlignment();
11724   LLVMContext &Context = *DAG.getContext();
11725   const DataLayout &DL = DAG.getDataLayout();
11726 
11727   // Store the constants into memory as one consecutive store.
11728   if (IsConstantSrc) {
11729     unsigned LastLegalType = 0;
11730     unsigned LastLegalVectorType = 0;
11731     bool NonZero = false;
11732     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11733       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11734       SDValue StoredVal = St->getValue();
11735 
11736       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
11737         NonZero |= !C->isNullValue();
11738       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
11739         NonZero |= !C->getConstantFPValue()->isNullValue();
11740       } else {
11741         // Non-constant.
11742         break;
11743       }
11744 
11745       // Find a legal type for the constant store.
11746       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11747       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11748       bool IsFast;
11749       if (TLI.isTypeLegal(StoreTy) &&
11750           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11751                                  FirstStoreAlign, &IsFast) && IsFast) {
11752         LastLegalType = i+1;
11753       // Or check whether a truncstore is legal.
11754       } else if (TLI.getTypeAction(Context, StoreTy) ==
11755                  TargetLowering::TypePromoteInteger) {
11756         EVT LegalizedStoredValueTy =
11757           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
11758         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11759             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11760                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
11761             IsFast) {
11762           LastLegalType = i + 1;
11763         }
11764       }
11765 
11766       // We only use vectors if the constant is known to be zero or the target
11767       // allows it and the function is not marked with the noimplicitfloat
11768       // attribute.
11769       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
11770                                                         FirstStoreAS)) &&
11771           !NoVectors) {
11772         // Find a legal type for the vector store.
11773         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
11774         if (TLI.isTypeLegal(Ty) &&
11775             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11776                                    FirstStoreAlign, &IsFast) && IsFast)
11777           LastLegalVectorType = i + 1;
11778       }
11779     }
11780 
11781     // Check if we found a legal integer type to store.
11782     if (LastLegalType == 0 && LastLegalVectorType == 0)
11783       return false;
11784 
11785     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
11786     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
11787 
11788     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11789                                            true, UseVector);
11790   }
11791 
11792   // When extracting multiple vector elements, try to store them
11793   // in one vector store rather than a sequence of scalar stores.
11794   if (IsExtractVecSrc) {
11795     unsigned NumStoresToMerge = 0;
11796     bool IsVec = MemVT.isVector();
11797     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
11798       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11799       unsigned StoreValOpcode = St->getValue().getOpcode();
11800       // This restriction could be loosened.
11801       // Bail out if any stored values are not elements extracted from a vector.
11802       // It should be possible to handle mixed sources, but load sources need
11803       // more careful handling (see the block of code below that handles
11804       // consecutive loads).
11805       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
11806           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
11807         return false;
11808 
11809       // Find a legal type for the vector store.
11810       unsigned Elts = i + 1;
11811       if (IsVec) {
11812         // When merging vector stores, get the total number of elements.
11813         Elts *= MemVT.getVectorNumElements();
11814       }
11815       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11816       bool IsFast;
11817       if (TLI.isTypeLegal(Ty) &&
11818           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11819                                  FirstStoreAlign, &IsFast) && IsFast)
11820         NumStoresToMerge = i + 1;
11821     }
11822 
11823     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
11824                                            false, true);
11825   }
11826 
11827   // Below we handle the case of multiple consecutive stores that
11828   // come from multiple consecutive loads. We merge them into a single
11829   // wide load and a single wide store.
11830 
11831   // Look for load nodes which are used by the stored values.
11832   SmallVector<MemOpLink, 8> LoadNodes;
11833 
11834   // Find acceptable loads. Loads need to have the same chain (token factor),
11835   // must not be zext, volatile, indexed, and they must be consecutive.
11836   BaseIndexOffset LdBasePtr;
11837   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11838     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11839     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11840     if (!Ld) break;
11841 
11842     // Loads must only have one use.
11843     if (!Ld->hasNUsesOfValue(1, 0))
11844       break;
11845 
11846     // The memory operands must not be volatile.
11847     if (Ld->isVolatile() || Ld->isIndexed())
11848       break;
11849 
11850     // We do not accept ext loads.
11851     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11852       break;
11853 
11854     // The stored memory type must be the same.
11855     if (Ld->getMemoryVT() != MemVT)
11856       break;
11857 
11858     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
11859     // If this is not the first ptr that we check.
11860     if (LdBasePtr.Base.getNode()) {
11861       // The base ptr must be the same.
11862       if (!LdPtr.equalBaseIndex(LdBasePtr))
11863         break;
11864     } else {
11865       // Check that all other base pointers are the same as this one.
11866       LdBasePtr = LdPtr;
11867     }
11868 
11869     // We found a potential memory operand to merge.
11870     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11871   }
11872 
11873   if (LoadNodes.size() < 2)
11874     return false;
11875 
11876   // If we have load/store pair instructions and we only have two values,
11877   // don't bother.
11878   unsigned RequiredAlignment;
11879   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11880       St->getAlignment() >= RequiredAlignment)
11881     return false;
11882 
11883   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11884   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11885   unsigned FirstLoadAlign = FirstLoad->getAlignment();
11886 
11887   // Scan the memory operations on the chain and find the first non-consecutive
11888   // load memory address. These variables hold the index in the store node
11889   // array.
11890   unsigned LastConsecutiveLoad = 0;
11891   // This variable refers to the size and not index in the array.
11892   unsigned LastLegalVectorType = 0;
11893   unsigned LastLegalIntegerType = 0;
11894   StartAddress = LoadNodes[0].OffsetFromBase;
11895   SDValue FirstChain = FirstLoad->getChain();
11896   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11897     // All loads must share the same chain.
11898     if (LoadNodes[i].MemNode->getChain() != FirstChain)
11899       break;
11900 
11901     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11902     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11903       break;
11904     LastConsecutiveLoad = i;
11905     // Find a legal type for the vector store.
11906     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
11907     bool IsFastSt, IsFastLd;
11908     if (TLI.isTypeLegal(StoreTy) &&
11909         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11910                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11911         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11912                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
11913       LastLegalVectorType = i + 1;
11914     }
11915 
11916     // Find a legal type for the integer store.
11917     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11918     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11919     if (TLI.isTypeLegal(StoreTy) &&
11920         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11921                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11922         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11923                                FirstLoadAlign, &IsFastLd) && IsFastLd)
11924       LastLegalIntegerType = i + 1;
11925     // Or check whether a truncstore and extload is legal.
11926     else if (TLI.getTypeAction(Context, StoreTy) ==
11927              TargetLowering::TypePromoteInteger) {
11928       EVT LegalizedStoredValueTy =
11929         TLI.getTypeToTransformTo(Context, StoreTy);
11930       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11931           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11932           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11933           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11934           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11935                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
11936           IsFastSt &&
11937           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11938                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
11939           IsFastLd)
11940         LastLegalIntegerType = i+1;
11941     }
11942   }
11943 
11944   // Only use vector types if the vector type is larger than the integer type.
11945   // If they are the same, use integers.
11946   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
11947   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
11948 
11949   // We add +1 here because the LastXXX variables refer to location while
11950   // the NumElem refers to array/index size.
11951   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
11952   NumElem = std::min(LastLegalType, NumElem);
11953 
11954   if (NumElem < 2)
11955     return false;
11956 
11957   // Collect the chains from all merged stores.
11958   SmallVector<SDValue, 8> MergeStoreChains;
11959   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
11960 
11961   // The latest Node in the DAG.
11962   unsigned LatestNodeUsed = 0;
11963   for (unsigned i=1; i<NumElem; ++i) {
11964     // Find a chain for the new wide-store operand. Notice that some
11965     // of the store nodes that we found may not be selected for inclusion
11966     // in the wide store. The chain we use needs to be the chain of the
11967     // latest store node which is *used* and replaced by the wide store.
11968     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11969       LatestNodeUsed = i;
11970 
11971     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
11972   }
11973 
11974   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11975 
11976   // Find if it is better to use vectors or integers to load and store
11977   // to memory.
11978   EVT JointMemOpVT;
11979   if (UseVectorTy) {
11980     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
11981   } else {
11982     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
11983     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
11984   }
11985 
11986   SDLoc LoadDL(LoadNodes[0].MemNode);
11987   SDLoc StoreDL(StoreNodes[0].MemNode);
11988 
11989   // The merged loads are required to have the same incoming chain, so
11990   // using the first's chain is acceptable.
11991   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
11992                                 FirstLoad->getBasePtr(),
11993                                 FirstLoad->getPointerInfo(), FirstLoadAlign);
11994 
11995   SDValue NewStoreChain =
11996     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
11997 
11998   SDValue NewStore =
11999       DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
12000                    FirstInChain->getPointerInfo(), FirstStoreAlign);
12001 
12002   // Transfer chain users from old loads to the new load.
12003   for (unsigned i = 0; i < NumElem; ++i) {
12004     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12005     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12006                                   SDValue(NewLoad.getNode(), 1));
12007   }
12008 
12009   if (UseAA) {
12010     // Replace the all stores with the new store.
12011     for (unsigned i = 0; i < NumElem; ++i)
12012       CombineTo(StoreNodes[i].MemNode, NewStore);
12013   } else {
12014     // Replace the last store with the new store.
12015     CombineTo(LatestOp, NewStore);
12016     // Erase all other stores.
12017     for (unsigned i = 0; i < NumElem; ++i) {
12018       // Remove all Store nodes.
12019       if (StoreNodes[i].MemNode == LatestOp)
12020         continue;
12021       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12022       DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
12023       deleteAndRecombine(St);
12024     }
12025   }
12026 
12027   return true;
12028 }
12029 
12030 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12031   SDLoc SL(ST);
12032   SDValue ReplStore;
12033 
12034   // Replace the chain to avoid dependency.
12035   if (ST->isTruncatingStore()) {
12036     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12037                                   ST->getBasePtr(), ST->getMemoryVT(),
12038                                   ST->getMemOperand());
12039   } else {
12040     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12041                              ST->getMemOperand());
12042   }
12043 
12044   // Create token to keep both nodes around.
12045   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12046                               MVT::Other, ST->getChain(), ReplStore);
12047 
12048   // Make sure the new and old chains are cleaned up.
12049   AddToWorklist(Token.getNode());
12050 
12051   // Don't add users to work list.
12052   return CombineTo(ST, Token, false);
12053 }
12054 
12055 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12056   SDValue Value = ST->getValue();
12057   if (Value.getOpcode() == ISD::TargetConstantFP)
12058     return SDValue();
12059 
12060   SDLoc DL(ST);
12061 
12062   SDValue Chain = ST->getChain();
12063   SDValue Ptr = ST->getBasePtr();
12064 
12065   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
12066 
12067   // NOTE: If the original store is volatile, this transform must not increase
12068   // the number of stores.  For example, on x86-32 an f64 can be stored in one
12069   // processor operation but an i64 (which is not legal) requires two.  So the
12070   // transform should not be done in this case.
12071 
12072   SDValue Tmp;
12073   switch (CFP->getSimpleValueType(0).SimpleTy) {
12074   default:
12075     llvm_unreachable("Unknown FP type");
12076   case MVT::f16:    // We don't do this for these yet.
12077   case MVT::f80:
12078   case MVT::f128:
12079   case MVT::ppcf128:
12080     return SDValue();
12081   case MVT::f32:
12082     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12083         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12084       ;
12085       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12086                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12087                             MVT::i32);
12088       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12089     }
12090 
12091     return SDValue();
12092   case MVT::f64:
12093     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12094          !ST->isVolatile()) ||
12095         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
12096       ;
12097       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12098                             getZExtValue(), SDLoc(CFP), MVT::i64);
12099       return DAG.getStore(Chain, DL, Tmp,
12100                           Ptr, ST->getMemOperand());
12101     }
12102 
12103     if (!ST->isVolatile() &&
12104         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12105       // Many FP stores are not made apparent until after legalize, e.g. for
12106       // argument passing.  Since this is so common, custom legalize the
12107       // 64-bit integer store into two 32-bit stores.
12108       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12109       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12110       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12111       if (DAG.getDataLayout().isBigEndian())
12112         std::swap(Lo, Hi);
12113 
12114       unsigned Alignment = ST->getAlignment();
12115       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12116       AAMDNodes AAInfo = ST->getAAInfo();
12117 
12118       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12119                                  ST->getAlignment(), MMOFlags, AAInfo);
12120       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12121                         DAG.getConstant(4, DL, Ptr.getValueType()));
12122       Alignment = MinAlign(Alignment, 4U);
12123       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12124                                  ST->getPointerInfo().getWithOffset(4),
12125                                  Alignment, MMOFlags, AAInfo);
12126       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12127                          St0, St1);
12128     }
12129 
12130     return SDValue();
12131   }
12132 }
12133 
12134 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12135   StoreSDNode *ST  = cast<StoreSDNode>(N);
12136   SDValue Chain = ST->getChain();
12137   SDValue Value = ST->getValue();
12138   SDValue Ptr   = ST->getBasePtr();
12139 
12140   // If this is a store of a bit convert, store the input value if the
12141   // resultant store does not need a higher alignment than the original.
12142   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12143       ST->isUnindexed()) {
12144     EVT SVT = Value.getOperand(0).getValueType();
12145     if (((!LegalOperations && !ST->isVolatile()) ||
12146          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12147         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12148       unsigned OrigAlign = ST->getAlignment();
12149       bool Fast = false;
12150       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12151                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
12152           Fast) {
12153         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12154                             ST->getPointerInfo(), OrigAlign,
12155                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
12156       }
12157     }
12158   }
12159 
12160   // Turn 'store undef, Ptr' -> nothing.
12161   if (Value.isUndef() && ST->isUnindexed())
12162     return Chain;
12163 
12164   // Try to infer better alignment information than the store already has.
12165   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12166     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12167       if (Align > ST->getAlignment()) {
12168         SDValue NewStore =
12169             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12170                               ST->getMemoryVT(), Align,
12171                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
12172         if (NewStore.getNode() != N)
12173           return CombineTo(ST, NewStore, true);
12174       }
12175     }
12176   }
12177 
12178   // Try transforming a pair floating point load / store ops to integer
12179   // load / store ops.
12180   if (SDValue NewST = TransformFPLoadStorePair(N))
12181     return NewST;
12182 
12183   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12184                                                   : DAG.getSubtarget().useAA();
12185 #ifndef NDEBUG
12186   if (CombinerAAOnlyFunc.getNumOccurrences() &&
12187       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
12188     UseAA = false;
12189 #endif
12190   if (UseAA && ST->isUnindexed()) {
12191     // FIXME: We should do this even without AA enabled. AA will just allow
12192     // FindBetterChain to work in more situations. The problem with this is that
12193     // any combine that expects memory operations to be on consecutive chains
12194     // first needs to be updated to look for users of the same chain.
12195 
12196     // Walk up chain skipping non-aliasing memory nodes, on this store and any
12197     // adjacent stores.
12198     if (findBetterNeighborChains(ST)) {
12199       // replaceStoreChain uses CombineTo, which handled all of the worklist
12200       // manipulation. Return the original node to not do anything else.
12201       return SDValue(ST, 0);
12202     }
12203     Chain = ST->getChain();
12204   }
12205 
12206   // Try transforming N to an indexed store.
12207   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12208     return SDValue(N, 0);
12209 
12210   // FIXME: is there such a thing as a truncating indexed store?
12211   if (ST->isTruncatingStore() && ST->isUnindexed() &&
12212       Value.getValueType().isInteger()) {
12213     // See if we can simplify the input to this truncstore with knowledge that
12214     // only the low bits are being used.  For example:
12215     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
12216     SDValue Shorter = GetDemandedBits(
12217         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12218                                     ST->getMemoryVT().getScalarSizeInBits()));
12219     AddToWorklist(Value.getNode());
12220     if (Shorter.getNode())
12221       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12222                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
12223 
12224     // Otherwise, see if we can simplify the operation with
12225     // SimplifyDemandedBits, which only works if the value has a single use.
12226     if (SimplifyDemandedBits(
12227             Value,
12228             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12229                                  ST->getMemoryVT().getScalarSizeInBits())))
12230       return SDValue(N, 0);
12231   }
12232 
12233   // If this is a load followed by a store to the same location, then the store
12234   // is dead/noop.
12235   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12236     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12237         ST->isUnindexed() && !ST->isVolatile() &&
12238         // There can't be any side effects between the load and store, such as
12239         // a call or store.
12240         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12241       // The store is dead, remove it.
12242       return Chain;
12243     }
12244   }
12245 
12246   // If this is a store followed by a store with the same value to the same
12247   // location, then the store is dead/noop.
12248   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12249     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12250         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12251         ST1->isUnindexed() && !ST1->isVolatile()) {
12252       // The store is dead, remove it.
12253       return Chain;
12254     }
12255   }
12256 
12257   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12258   // truncating store.  We can do this even if this is already a truncstore.
12259   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12260       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12261       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12262                             ST->getMemoryVT())) {
12263     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12264                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12265   }
12266 
12267   // Only perform this optimization before the types are legal, because we
12268   // don't want to perform this optimization on every DAGCombine invocation.
12269   if (!LegalTypes) {
12270     bool EverChanged = false;
12271 
12272     do {
12273       // There can be multiple store sequences on the same chain.
12274       // Keep trying to merge store sequences until we are unable to do so
12275       // or until we merge the last store on the chain.
12276       bool Changed = MergeConsecutiveStores(ST);
12277       EverChanged |= Changed;
12278       if (!Changed) break;
12279     } while (ST->getOpcode() != ISD::DELETED_NODE);
12280 
12281     if (EverChanged)
12282       return SDValue(N, 0);
12283   }
12284 
12285   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12286   //
12287   // Make sure to do this only after attempting to merge stores in order to
12288   //  avoid changing the types of some subset of stores due to visit order,
12289   //  preventing their merging.
12290   if (isa<ConstantFPSDNode>(Value)) {
12291     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12292       return NewSt;
12293   }
12294 
12295   if (SDValue NewSt = splitMergedValStore(ST))
12296     return NewSt;
12297 
12298   return ReduceLoadOpStoreWidth(N);
12299 }
12300 
12301 /// For the instruction sequence of store below, F and I values
12302 /// are bundled together as an i64 value before being stored into memory.
12303 /// Sometimes it is more efficent to generate separate stores for F and I,
12304 /// which can remove the bitwise instructions or sink them to colder places.
12305 ///
12306 ///   (store (or (zext (bitcast F to i32) to i64),
12307 ///              (shl (zext I to i64), 32)), addr)  -->
12308 ///   (store F, addr) and (store I, addr+4)
12309 ///
12310 /// Similarly, splitting for other merged store can also be beneficial, like:
12311 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12312 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12313 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12314 /// For pair of {i16, i8},  i32 store --> two i16 stores.
12315 /// For pair of {i8, i8},   i16 store --> two i8 stores.
12316 ///
12317 /// We allow each target to determine specifically which kind of splitting is
12318 /// supported.
12319 ///
12320 /// The store patterns are commonly seen from the simple code snippet below
12321 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12322 ///   void goo(const std::pair<int, float> &);
12323 ///   hoo() {
12324 ///     ...
12325 ///     goo(std::make_pair(tmp, ftmp));
12326 ///     ...
12327 ///   }
12328 ///
12329 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12330   if (OptLevel == CodeGenOpt::None)
12331     return SDValue();
12332 
12333   SDValue Val = ST->getValue();
12334   SDLoc DL(ST);
12335 
12336   // Match OR operand.
12337   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12338     return SDValue();
12339 
12340   // Match SHL operand and get Lower and Higher parts of Val.
12341   SDValue Op1 = Val.getOperand(0);
12342   SDValue Op2 = Val.getOperand(1);
12343   SDValue Lo, Hi;
12344   if (Op1.getOpcode() != ISD::SHL) {
12345     std::swap(Op1, Op2);
12346     if (Op1.getOpcode() != ISD::SHL)
12347       return SDValue();
12348   }
12349   Lo = Op2;
12350   Hi = Op1.getOperand(0);
12351   if (!Op1.hasOneUse())
12352     return SDValue();
12353 
12354   // Match shift amount to HalfValBitSize.
12355   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
12356   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
12357   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12358     return SDValue();
12359 
12360   // Lo and Hi are zero-extended from int with size less equal than 32
12361   // to i64.
12362   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12363       !Lo.getOperand(0).getValueType().isScalarInteger() ||
12364       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
12365       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12366       !Hi.getOperand(0).getValueType().isScalarInteger() ||
12367       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
12368     return SDValue();
12369 
12370   if (!TLI.isMultiStoresCheaperThanBitsMerge(Lo.getOperand(0),
12371                                              Hi.getOperand(0)))
12372     return SDValue();
12373 
12374   // Start to split store.
12375   unsigned Alignment = ST->getAlignment();
12376   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12377   AAMDNodes AAInfo = ST->getAAInfo();
12378 
12379   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
12380   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
12381   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
12382   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
12383 
12384   SDValue Chain = ST->getChain();
12385   SDValue Ptr = ST->getBasePtr();
12386   // Lower value store.
12387   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12388                              ST->getAlignment(), MMOFlags, AAInfo);
12389   Ptr =
12390       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12391                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
12392   // Higher value store.
12393   SDValue St1 =
12394       DAG.getStore(St0, DL, Hi, Ptr,
12395                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
12396                    Alignment / 2, MMOFlags, AAInfo);
12397   return St1;
12398 }
12399 
12400 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
12401   SDValue InVec = N->getOperand(0);
12402   SDValue InVal = N->getOperand(1);
12403   SDValue EltNo = N->getOperand(2);
12404   SDLoc DL(N);
12405 
12406   // If the inserted element is an UNDEF, just use the input vector.
12407   if (InVal.isUndef())
12408     return InVec;
12409 
12410   EVT VT = InVec.getValueType();
12411 
12412   // If we can't generate a legal BUILD_VECTOR, exit
12413   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
12414     return SDValue();
12415 
12416   // Check that we know which element is being inserted
12417   if (!isa<ConstantSDNode>(EltNo))
12418     return SDValue();
12419   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12420 
12421   // Canonicalize insert_vector_elt dag nodes.
12422   // Example:
12423   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
12424   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
12425   //
12426   // Do this only if the child insert_vector node has one use; also
12427   // do this only if indices are both constants and Idx1 < Idx0.
12428   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
12429       && isa<ConstantSDNode>(InVec.getOperand(2))) {
12430     unsigned OtherElt =
12431       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
12432     if (Elt < OtherElt) {
12433       // Swap nodes.
12434       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
12435                                   InVec.getOperand(0), InVal, EltNo);
12436       AddToWorklist(NewOp.getNode());
12437       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
12438                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
12439     }
12440   }
12441 
12442   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
12443   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
12444   // vector elements.
12445   SmallVector<SDValue, 8> Ops;
12446   // Do not combine these two vectors if the output vector will not replace
12447   // the input vector.
12448   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
12449     Ops.append(InVec.getNode()->op_begin(),
12450                InVec.getNode()->op_end());
12451   } else if (InVec.isUndef()) {
12452     unsigned NElts = VT.getVectorNumElements();
12453     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
12454   } else {
12455     return SDValue();
12456   }
12457 
12458   // Insert the element
12459   if (Elt < Ops.size()) {
12460     // All the operands of BUILD_VECTOR must have the same type;
12461     // we enforce that here.
12462     EVT OpVT = Ops[0].getValueType();
12463     if (InVal.getValueType() != OpVT)
12464       InVal = OpVT.bitsGT(InVal.getValueType()) ?
12465                 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
12466                 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
12467     Ops[Elt] = InVal;
12468   }
12469 
12470   // Return the new vector
12471   return DAG.getBuildVector(VT, DL, Ops);
12472 }
12473 
12474 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
12475     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
12476   assert(!OriginalLoad->isVolatile());
12477 
12478   EVT ResultVT = EVE->getValueType(0);
12479   EVT VecEltVT = InVecVT.getVectorElementType();
12480   unsigned Align = OriginalLoad->getAlignment();
12481   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
12482       VecEltVT.getTypeForEVT(*DAG.getContext()));
12483 
12484   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12485     return SDValue();
12486 
12487   Align = NewAlign;
12488 
12489   SDValue NewPtr = OriginalLoad->getBasePtr();
12490   SDValue Offset;
12491   EVT PtrType = NewPtr.getValueType();
12492   MachinePointerInfo MPI;
12493   SDLoc DL(EVE);
12494   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12495     int Elt = ConstEltNo->getZExtValue();
12496     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12497     Offset = DAG.getConstant(PtrOff, DL, PtrType);
12498     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12499   } else {
12500     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
12501     Offset = DAG.getNode(
12502         ISD::MUL, DL, PtrType, Offset,
12503         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
12504     MPI = OriginalLoad->getPointerInfo();
12505   }
12506   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
12507 
12508   // The replacement we need to do here is a little tricky: we need to
12509   // replace an extractelement of a load with a load.
12510   // Use ReplaceAllUsesOfValuesWith to do the replacement.
12511   // Note that this replacement assumes that the extractvalue is the only
12512   // use of the load; that's okay because we don't want to perform this
12513   // transformation in other cases anyway.
12514   SDValue Load;
12515   SDValue Chain;
12516   if (ResultVT.bitsGT(VecEltVT)) {
12517     // If the result type of vextract is wider than the load, then issue an
12518     // extending load instead.
12519     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
12520                                                   VecEltVT)
12521                                    ? ISD::ZEXTLOAD
12522                                    : ISD::EXTLOAD;
12523     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
12524                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
12525                           Align, OriginalLoad->getMemOperand()->getFlags(),
12526                           OriginalLoad->getAAInfo());
12527     Chain = Load.getValue(1);
12528   } else {
12529     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
12530                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
12531                        OriginalLoad->getAAInfo());
12532     Chain = Load.getValue(1);
12533     if (ResultVT.bitsLT(VecEltVT))
12534       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
12535     else
12536       Load = DAG.getBitcast(ResultVT, Load);
12537   }
12538   WorklistRemover DeadNodes(*this);
12539   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
12540   SDValue To[] = { Load, Chain };
12541   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
12542   // Since we're explicitly calling ReplaceAllUses, add the new node to the
12543   // worklist explicitly as well.
12544   AddToWorklist(Load.getNode());
12545   AddUsersToWorklist(Load.getNode()); // Add users too
12546   // Make sure to revisit this node to clean it up; it will usually be dead.
12547   AddToWorklist(EVE);
12548   ++OpsNarrowed;
12549   return SDValue(EVE, 0);
12550 }
12551 
12552 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
12553   // (vextract (scalar_to_vector val, 0) -> val
12554   SDValue InVec = N->getOperand(0);
12555   EVT VT = InVec.getValueType();
12556   EVT NVT = N->getValueType(0);
12557 
12558   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
12559     // Check if the result type doesn't match the inserted element type. A
12560     // SCALAR_TO_VECTOR may truncate the inserted element and the
12561     // EXTRACT_VECTOR_ELT may widen the extracted vector.
12562     SDValue InOp = InVec.getOperand(0);
12563     if (InOp.getValueType() != NVT) {
12564       assert(InOp.getValueType().isInteger() && NVT.isInteger());
12565       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
12566     }
12567     return InOp;
12568   }
12569 
12570   SDValue EltNo = N->getOperand(1);
12571   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
12572 
12573   // extract_vector_elt (build_vector x, y), 1 -> y
12574   if (ConstEltNo &&
12575       InVec.getOpcode() == ISD::BUILD_VECTOR &&
12576       TLI.isTypeLegal(VT) &&
12577       (InVec.hasOneUse() ||
12578        TLI.aggressivelyPreferBuildVectorSources(VT))) {
12579     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
12580     EVT InEltVT = Elt.getValueType();
12581 
12582     // Sometimes build_vector's scalar input types do not match result type.
12583     if (NVT == InEltVT)
12584       return Elt;
12585 
12586     // TODO: It may be useful to truncate if free if the build_vector implicitly
12587     // converts.
12588   }
12589 
12590   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
12591   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
12592       ConstEltNo->isNullValue() && VT.isInteger()) {
12593     SDValue BCSrc = InVec.getOperand(0);
12594     if (BCSrc.getValueType().isScalarInteger())
12595       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
12596   }
12597 
12598   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
12599   //
12600   // This only really matters if the index is non-constant since other combines
12601   // on the constant elements already work.
12602   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12603       EltNo == InVec.getOperand(2)) {
12604     SDValue Elt = InVec.getOperand(1);
12605     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
12606   }
12607 
12608   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
12609   // We only perform this optimization before the op legalization phase because
12610   // we may introduce new vector instructions which are not backed by TD
12611   // patterns. For example on AVX, extracting elements from a wide vector
12612   // without using extract_subvector. However, if we can find an underlying
12613   // scalar value, then we can always use that.
12614   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
12615     int NumElem = VT.getVectorNumElements();
12616     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
12617     // Find the new index to extract from.
12618     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
12619 
12620     // Extracting an undef index is undef.
12621     if (OrigElt == -1)
12622       return DAG.getUNDEF(NVT);
12623 
12624     // Select the right vector half to extract from.
12625     SDValue SVInVec;
12626     if (OrigElt < NumElem) {
12627       SVInVec = InVec->getOperand(0);
12628     } else {
12629       SVInVec = InVec->getOperand(1);
12630       OrigElt -= NumElem;
12631     }
12632 
12633     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
12634       SDValue InOp = SVInVec.getOperand(OrigElt);
12635       if (InOp.getValueType() != NVT) {
12636         assert(InOp.getValueType().isInteger() && NVT.isInteger());
12637         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
12638       }
12639 
12640       return InOp;
12641     }
12642 
12643     // FIXME: We should handle recursing on other vector shuffles and
12644     // scalar_to_vector here as well.
12645 
12646     if (!LegalOperations) {
12647       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12648       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
12649                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
12650     }
12651   }
12652 
12653   bool BCNumEltsChanged = false;
12654   EVT ExtVT = VT.getVectorElementType();
12655   EVT LVT = ExtVT;
12656 
12657   // If the result of load has to be truncated, then it's not necessarily
12658   // profitable.
12659   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
12660     return SDValue();
12661 
12662   if (InVec.getOpcode() == ISD::BITCAST) {
12663     // Don't duplicate a load with other uses.
12664     if (!InVec.hasOneUse())
12665       return SDValue();
12666 
12667     EVT BCVT = InVec.getOperand(0).getValueType();
12668     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
12669       return SDValue();
12670     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
12671       BCNumEltsChanged = true;
12672     InVec = InVec.getOperand(0);
12673     ExtVT = BCVT.getVectorElementType();
12674   }
12675 
12676   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
12677   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
12678       ISD::isNormalLoad(InVec.getNode()) &&
12679       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
12680     SDValue Index = N->getOperand(1);
12681     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
12682       if (!OrigLoad->isVolatile()) {
12683         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
12684                                                              OrigLoad);
12685       }
12686     }
12687   }
12688 
12689   // Perform only after legalization to ensure build_vector / vector_shuffle
12690   // optimizations have already been done.
12691   if (!LegalOperations) return SDValue();
12692 
12693   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
12694   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
12695   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
12696 
12697   if (ConstEltNo) {
12698     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12699 
12700     LoadSDNode *LN0 = nullptr;
12701     const ShuffleVectorSDNode *SVN = nullptr;
12702     if (ISD::isNormalLoad(InVec.getNode())) {
12703       LN0 = cast<LoadSDNode>(InVec);
12704     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
12705                InVec.getOperand(0).getValueType() == ExtVT &&
12706                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
12707       // Don't duplicate a load with other uses.
12708       if (!InVec.hasOneUse())
12709         return SDValue();
12710 
12711       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
12712     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
12713       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
12714       // =>
12715       // (load $addr+1*size)
12716 
12717       // Don't duplicate a load with other uses.
12718       if (!InVec.hasOneUse())
12719         return SDValue();
12720 
12721       // If the bit convert changed the number of elements, it is unsafe
12722       // to examine the mask.
12723       if (BCNumEltsChanged)
12724         return SDValue();
12725 
12726       // Select the input vector, guarding against out of range extract vector.
12727       unsigned NumElems = VT.getVectorNumElements();
12728       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
12729       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
12730 
12731       if (InVec.getOpcode() == ISD::BITCAST) {
12732         // Don't duplicate a load with other uses.
12733         if (!InVec.hasOneUse())
12734           return SDValue();
12735 
12736         InVec = InVec.getOperand(0);
12737       }
12738       if (ISD::isNormalLoad(InVec.getNode())) {
12739         LN0 = cast<LoadSDNode>(InVec);
12740         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
12741         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
12742       }
12743     }
12744 
12745     // Make sure we found a non-volatile load and the extractelement is
12746     // the only use.
12747     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
12748       return SDValue();
12749 
12750     // If Idx was -1 above, Elt is going to be -1, so just return undef.
12751     if (Elt == -1)
12752       return DAG.getUNDEF(LVT);
12753 
12754     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
12755   }
12756 
12757   return SDValue();
12758 }
12759 
12760 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
12761 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
12762   // We perform this optimization post type-legalization because
12763   // the type-legalizer often scalarizes integer-promoted vectors.
12764   // Performing this optimization before may create bit-casts which
12765   // will be type-legalized to complex code sequences.
12766   // We perform this optimization only before the operation legalizer because we
12767   // may introduce illegal operations.
12768   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
12769     return SDValue();
12770 
12771   unsigned NumInScalars = N->getNumOperands();
12772   SDLoc DL(N);
12773   EVT VT = N->getValueType(0);
12774 
12775   // Check to see if this is a BUILD_VECTOR of a bunch of values
12776   // which come from any_extend or zero_extend nodes. If so, we can create
12777   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
12778   // optimizations. We do not handle sign-extend because we can't fill the sign
12779   // using shuffles.
12780   EVT SourceType = MVT::Other;
12781   bool AllAnyExt = true;
12782 
12783   for (unsigned i = 0; i != NumInScalars; ++i) {
12784     SDValue In = N->getOperand(i);
12785     // Ignore undef inputs.
12786     if (In.isUndef()) continue;
12787 
12788     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
12789     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
12790 
12791     // Abort if the element is not an extension.
12792     if (!ZeroExt && !AnyExt) {
12793       SourceType = MVT::Other;
12794       break;
12795     }
12796 
12797     // The input is a ZeroExt or AnyExt. Check the original type.
12798     EVT InTy = In.getOperand(0).getValueType();
12799 
12800     // Check that all of the widened source types are the same.
12801     if (SourceType == MVT::Other)
12802       // First time.
12803       SourceType = InTy;
12804     else if (InTy != SourceType) {
12805       // Multiple income types. Abort.
12806       SourceType = MVT::Other;
12807       break;
12808     }
12809 
12810     // Check if all of the extends are ANY_EXTENDs.
12811     AllAnyExt &= AnyExt;
12812   }
12813 
12814   // In order to have valid types, all of the inputs must be extended from the
12815   // same source type and all of the inputs must be any or zero extend.
12816   // Scalar sizes must be a power of two.
12817   EVT OutScalarTy = VT.getScalarType();
12818   bool ValidTypes = SourceType != MVT::Other &&
12819                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
12820                  isPowerOf2_32(SourceType.getSizeInBits());
12821 
12822   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
12823   // turn into a single shuffle instruction.
12824   if (!ValidTypes)
12825     return SDValue();
12826 
12827   bool isLE = DAG.getDataLayout().isLittleEndian();
12828   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
12829   assert(ElemRatio > 1 && "Invalid element size ratio");
12830   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
12831                                DAG.getConstant(0, DL, SourceType);
12832 
12833   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
12834   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
12835 
12836   // Populate the new build_vector
12837   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12838     SDValue Cast = N->getOperand(i);
12839     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
12840             Cast.getOpcode() == ISD::ZERO_EXTEND ||
12841             Cast.isUndef()) && "Invalid cast opcode");
12842     SDValue In;
12843     if (Cast.isUndef())
12844       In = DAG.getUNDEF(SourceType);
12845     else
12846       In = Cast->getOperand(0);
12847     unsigned Index = isLE ? (i * ElemRatio) :
12848                             (i * ElemRatio + (ElemRatio - 1));
12849 
12850     assert(Index < Ops.size() && "Invalid index");
12851     Ops[Index] = In;
12852   }
12853 
12854   // The type of the new BUILD_VECTOR node.
12855   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
12856   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
12857          "Invalid vector size");
12858   // Check if the new vector type is legal.
12859   if (!isTypeLegal(VecVT)) return SDValue();
12860 
12861   // Make the new BUILD_VECTOR.
12862   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
12863 
12864   // The new BUILD_VECTOR node has the potential to be further optimized.
12865   AddToWorklist(BV.getNode());
12866   // Bitcast to the desired type.
12867   return DAG.getBitcast(VT, BV);
12868 }
12869 
12870 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
12871   EVT VT = N->getValueType(0);
12872 
12873   unsigned NumInScalars = N->getNumOperands();
12874   SDLoc DL(N);
12875 
12876   EVT SrcVT = MVT::Other;
12877   unsigned Opcode = ISD::DELETED_NODE;
12878   unsigned NumDefs = 0;
12879 
12880   for (unsigned i = 0; i != NumInScalars; ++i) {
12881     SDValue In = N->getOperand(i);
12882     unsigned Opc = In.getOpcode();
12883 
12884     if (Opc == ISD::UNDEF)
12885       continue;
12886 
12887     // If all scalar values are floats and converted from integers.
12888     if (Opcode == ISD::DELETED_NODE &&
12889         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
12890       Opcode = Opc;
12891     }
12892 
12893     if (Opc != Opcode)
12894       return SDValue();
12895 
12896     EVT InVT = In.getOperand(0).getValueType();
12897 
12898     // If all scalar values are typed differently, bail out. It's chosen to
12899     // simplify BUILD_VECTOR of integer types.
12900     if (SrcVT == MVT::Other)
12901       SrcVT = InVT;
12902     if (SrcVT != InVT)
12903       return SDValue();
12904     NumDefs++;
12905   }
12906 
12907   // If the vector has just one element defined, it's not worth to fold it into
12908   // a vectorized one.
12909   if (NumDefs < 2)
12910     return SDValue();
12911 
12912   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
12913          && "Should only handle conversion from integer to float.");
12914   assert(SrcVT != MVT::Other && "Cannot determine source type!");
12915 
12916   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
12917 
12918   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
12919     return SDValue();
12920 
12921   // Just because the floating-point vector type is legal does not necessarily
12922   // mean that the corresponding integer vector type is.
12923   if (!isTypeLegal(NVT))
12924     return SDValue();
12925 
12926   SmallVector<SDValue, 8> Opnds;
12927   for (unsigned i = 0; i != NumInScalars; ++i) {
12928     SDValue In = N->getOperand(i);
12929 
12930     if (In.isUndef())
12931       Opnds.push_back(DAG.getUNDEF(SrcVT));
12932     else
12933       Opnds.push_back(In.getOperand(0));
12934   }
12935   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
12936   AddToWorklist(BV.getNode());
12937 
12938   return DAG.getNode(Opcode, DL, VT, BV);
12939 }
12940 
12941 SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
12942                                            ArrayRef<int> VectorMask,
12943                                            SDValue VecIn1, SDValue VecIn2,
12944                                            unsigned LeftIdx) {
12945   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12946   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
12947 
12948   EVT VT = N->getValueType(0);
12949   EVT InVT1 = VecIn1.getValueType();
12950   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
12951 
12952   unsigned Vec2Offset = InVT1.getVectorNumElements();
12953   unsigned NumElems = VT.getVectorNumElements();
12954   unsigned ShuffleNumElems = NumElems;
12955 
12956   // We can't generate a shuffle node with mismatched input and output types.
12957   // Try to make the types match the type of the output.
12958   if (InVT1 != VT || InVT2 != VT) {
12959     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
12960       // If the output vector length is a multiple of both input lengths,
12961       // we can concatenate them and pad the rest with undefs.
12962       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
12963       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
12964       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
12965       ConcatOps[0] = VecIn1;
12966       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
12967       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
12968       VecIn2 = SDValue();
12969     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
12970       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
12971         return SDValue();
12972 
12973       if (!VecIn2.getNode()) {
12974         // If we only have one input vector, and it's twice the size of the
12975         // output, split it in two.
12976         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
12977                              DAG.getConstant(NumElems, DL, IdxTy));
12978         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
12979         // Since we now have shorter input vectors, adjust the offset of the
12980         // second vector's start.
12981         Vec2Offset = NumElems;
12982       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
12983         // VecIn1 is wider than the output, and we have another, possibly
12984         // smaller input. Pad the smaller input with undefs, shuffle at the
12985         // input vector width, and extract the output.
12986         // The shuffle type is different than VT, so check legality again.
12987         if (LegalOperations &&
12988             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
12989           return SDValue();
12990 
12991         if (InVT1 != InVT2)
12992           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
12993                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
12994         ShuffleNumElems = NumElems * 2;
12995       } else {
12996         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
12997         // than VecIn1. We can't handle this for now - this case will disappear
12998         // when we start sorting the vectors by type.
12999         return SDValue();
13000       }
13001     } else {
13002       // TODO: Support cases where the length mismatch isn't exactly by a
13003       // factor of 2.
13004       // TODO: Move this check upwards, so that if we have bad type
13005       // mismatches, we don't create any DAG nodes.
13006       return SDValue();
13007     }
13008   }
13009 
13010   // Initialize mask to undef.
13011   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13012 
13013   // Only need to run up to the number of elements actually used, not the
13014   // total number of elements in the shuffle - if we are shuffling a wider
13015   // vector, the high lanes should be set to undef.
13016   for (unsigned i = 0; i != NumElems; ++i) {
13017     if (VectorMask[i] <= 0)
13018       continue;
13019 
13020     SDValue Extract = N->getOperand(i);
13021     unsigned ExtIndex =
13022         cast<ConstantSDNode>(Extract.getOperand(1))->getZExtValue();
13023 
13024     if (VectorMask[i] == (int)LeftIdx) {
13025       Mask[i] = ExtIndex;
13026     } else if (VectorMask[i] == (int)LeftIdx + 1) {
13027       Mask[i] = Vec2Offset + ExtIndex;
13028     }
13029   }
13030 
13031   // The type the input vectors may have changed above.
13032   InVT1 = VecIn1.getValueType();
13033 
13034   // If we already have a VecIn2, it should have the same type as VecIn1.
13035   // If we don't, get an undef/zero vector of the appropriate type.
13036   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
13037   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
13038 
13039   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
13040   if (ShuffleNumElems > NumElems)
13041     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
13042 
13043   return Shuffle;
13044 }
13045 
13046 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
13047 // operations. If the types of the vectors we're extracting from allow it,
13048 // turn this into a vector_shuffle node.
13049 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
13050   SDLoc DL(N);
13051   EVT VT = N->getValueType(0);
13052 
13053   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
13054   if (!isTypeLegal(VT))
13055     return SDValue();
13056 
13057   // May only combine to shuffle after legalize if shuffle is legal.
13058   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
13059     return SDValue();
13060 
13061   bool UsesZeroVector = false;
13062   unsigned NumElems = N->getNumOperands();
13063 
13064   // Record, for each element of the newly built vector, which input vector
13065   // that element comes from. -1 stands for undef, 0 for the zero vector,
13066   // and positive values for the input vectors.
13067   // VectorMask maps each element to its vector number, and VecIn maps vector
13068   // numbers to their initial SDValues.
13069 
13070   SmallVector<int, 8> VectorMask(NumElems, -1);
13071   SmallVector<SDValue, 8> VecIn;
13072   VecIn.push_back(SDValue());
13073 
13074   for (unsigned i = 0; i != NumElems; ++i) {
13075     SDValue Op = N->getOperand(i);
13076 
13077     if (Op.isUndef())
13078       continue;
13079 
13080     // See if we can use a blend with a zero vector.
13081     // TODO: Should we generalize this to a blend with an arbitrary constant
13082     // vector?
13083     if (isNullConstant(Op) || isNullFPConstant(Op)) {
13084       UsesZeroVector = true;
13085       VectorMask[i] = 0;
13086       continue;
13087     }
13088 
13089     // Not an undef or zero. If the input is something other than an
13090     // EXTRACT_VECTOR_ELT with a constant index, bail out.
13091     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13092         !isa<ConstantSDNode>(Op.getOperand(1)))
13093       return SDValue();
13094 
13095     SDValue ExtractedFromVec = Op.getOperand(0);
13096 
13097     // All inputs must have the same element type as the output.
13098     if (VT.getVectorElementType() !=
13099         ExtractedFromVec.getValueType().getVectorElementType())
13100       return SDValue();
13101 
13102     // Have we seen this input vector before?
13103     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
13104     // a map back from SDValues to numbers isn't worth it.
13105     unsigned Idx = std::distance(
13106         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
13107     if (Idx == VecIn.size())
13108       VecIn.push_back(ExtractedFromVec);
13109 
13110     VectorMask[i] = Idx;
13111   }
13112 
13113   // If we didn't find at least one input vector, bail out.
13114   if (VecIn.size() < 2)
13115     return SDValue();
13116 
13117   // TODO: We want to sort the vectors by descending length, so that adjacent
13118   // pairs have similar length, and the longer vector is always first in the
13119   // pair.
13120 
13121   // TODO: Should this fire if some of the input vectors has illegal type (like
13122   // it does now), or should we let legalization run its course first?
13123 
13124   // Shuffle phase:
13125   // Take pairs of vectors, and shuffle them so that the result has elements
13126   // from these vectors in the correct places.
13127   // For example, given:
13128   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
13129   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
13130   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
13131   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
13132   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
13133   // We will generate:
13134   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
13135   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
13136   SmallVector<SDValue, 4> Shuffles;
13137   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
13138     unsigned LeftIdx = 2 * In + 1;
13139     SDValue VecLeft = VecIn[LeftIdx];
13140     SDValue VecRight =
13141         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
13142 
13143     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
13144                                                 VecRight, LeftIdx))
13145       Shuffles.push_back(Shuffle);
13146     else
13147       return SDValue();
13148   }
13149 
13150   // If we need the zero vector as an "ingredient" in the blend tree, add it
13151   // to the list of shuffles.
13152   if (UsesZeroVector)
13153     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
13154                                       : DAG.getConstantFP(0.0, DL, VT));
13155 
13156   // If we only have one shuffle, we're done.
13157   if (Shuffles.size() == 1)
13158     return Shuffles[0];
13159 
13160   // Update the vector mask to point to the post-shuffle vectors.
13161   for (int &Vec : VectorMask)
13162     if (Vec == 0)
13163       Vec = Shuffles.size() - 1;
13164     else
13165       Vec = (Vec - 1) / 2;
13166 
13167   // More than one shuffle. Generate a binary tree of blends, e.g. if from
13168   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
13169   // generate:
13170   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
13171   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
13172   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
13173   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
13174   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
13175   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
13176   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
13177 
13178   // Make sure the initial size of the shuffle list is even.
13179   if (Shuffles.size() % 2)
13180     Shuffles.push_back(DAG.getUNDEF(VT));
13181 
13182   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
13183     if (CurSize % 2) {
13184       Shuffles[CurSize] = DAG.getUNDEF(VT);
13185       CurSize++;
13186     }
13187     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
13188       int Left = 2 * In;
13189       int Right = 2 * In + 1;
13190       SmallVector<int, 8> Mask(NumElems, -1);
13191       for (unsigned i = 0; i != NumElems; ++i) {
13192         if (VectorMask[i] == Left) {
13193           Mask[i] = i;
13194           VectorMask[i] = In;
13195         } else if (VectorMask[i] == Right) {
13196           Mask[i] = i + NumElems;
13197           VectorMask[i] = In;
13198         }
13199       }
13200 
13201       Shuffles[In] =
13202           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
13203     }
13204   }
13205 
13206   return Shuffles[0];
13207 }
13208 
13209 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13210   EVT VT = N->getValueType(0);
13211 
13212   // A vector built entirely of undefs is undef.
13213   if (ISD::allOperandsUndef(N))
13214     return DAG.getUNDEF(VT);
13215 
13216   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13217     return V;
13218 
13219   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13220     return V;
13221 
13222   if (SDValue V = reduceBuildVecToShuffle(N))
13223     return V;
13224 
13225   return SDValue();
13226 }
13227 
13228 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
13229   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13230   EVT OpVT = N->getOperand(0).getValueType();
13231 
13232   // If the operands are legal vectors, leave them alone.
13233   if (TLI.isTypeLegal(OpVT))
13234     return SDValue();
13235 
13236   SDLoc DL(N);
13237   EVT VT = N->getValueType(0);
13238   SmallVector<SDValue, 8> Ops;
13239 
13240   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13241   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13242 
13243   // Keep track of what we encounter.
13244   bool AnyInteger = false;
13245   bool AnyFP = false;
13246   for (const SDValue &Op : N->ops()) {
13247     if (ISD::BITCAST == Op.getOpcode() &&
13248         !Op.getOperand(0).getValueType().isVector())
13249       Ops.push_back(Op.getOperand(0));
13250     else if (ISD::UNDEF == Op.getOpcode())
13251       Ops.push_back(ScalarUndef);
13252     else
13253       return SDValue();
13254 
13255     // Note whether we encounter an integer or floating point scalar.
13256     // If it's neither, bail out, it could be something weird like x86mmx.
13257     EVT LastOpVT = Ops.back().getValueType();
13258     if (LastOpVT.isFloatingPoint())
13259       AnyFP = true;
13260     else if (LastOpVT.isInteger())
13261       AnyInteger = true;
13262     else
13263       return SDValue();
13264   }
13265 
13266   // If any of the operands is a floating point scalar bitcast to a vector,
13267   // use floating point types throughout, and bitcast everything.
13268   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13269   if (AnyFP) {
13270     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13271     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13272     if (AnyInteger) {
13273       for (SDValue &Op : Ops) {
13274         if (Op.getValueType() == SVT)
13275           continue;
13276         if (Op.isUndef())
13277           Op = ScalarUndef;
13278         else
13279           Op = DAG.getBitcast(SVT, Op);
13280       }
13281     }
13282   }
13283 
13284   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13285                                VT.getSizeInBits() / SVT.getSizeInBits());
13286   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13287 }
13288 
13289 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13290 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13291 // most two distinct vectors the same size as the result, attempt to turn this
13292 // into a legal shuffle.
13293 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
13294   EVT VT = N->getValueType(0);
13295   EVT OpVT = N->getOperand(0).getValueType();
13296   int NumElts = VT.getVectorNumElements();
13297   int NumOpElts = OpVT.getVectorNumElements();
13298 
13299   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13300   SmallVector<int, 8> Mask;
13301 
13302   for (SDValue Op : N->ops()) {
13303     // Peek through any bitcast.
13304     while (Op.getOpcode() == ISD::BITCAST)
13305       Op = Op.getOperand(0);
13306 
13307     // UNDEF nodes convert to UNDEF shuffle mask values.
13308     if (Op.isUndef()) {
13309       Mask.append((unsigned)NumOpElts, -1);
13310       continue;
13311     }
13312 
13313     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13314       return SDValue();
13315 
13316     // What vector are we extracting the subvector from and at what index?
13317     SDValue ExtVec = Op.getOperand(0);
13318 
13319     // We want the EVT of the original extraction to correctly scale the
13320     // extraction index.
13321     EVT ExtVT = ExtVec.getValueType();
13322 
13323     // Peek through any bitcast.
13324     while (ExtVec.getOpcode() == ISD::BITCAST)
13325       ExtVec = ExtVec.getOperand(0);
13326 
13327     // UNDEF nodes convert to UNDEF shuffle mask values.
13328     if (ExtVec.isUndef()) {
13329       Mask.append((unsigned)NumOpElts, -1);
13330       continue;
13331     }
13332 
13333     if (!isa<ConstantSDNode>(Op.getOperand(1)))
13334       return SDValue();
13335     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
13336 
13337     // Ensure that we are extracting a subvector from a vector the same
13338     // size as the result.
13339     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
13340       return SDValue();
13341 
13342     // Scale the subvector index to account for any bitcast.
13343     int NumExtElts = ExtVT.getVectorNumElements();
13344     if (0 == (NumExtElts % NumElts))
13345       ExtIdx /= (NumExtElts / NumElts);
13346     else if (0 == (NumElts % NumExtElts))
13347       ExtIdx *= (NumElts / NumExtElts);
13348     else
13349       return SDValue();
13350 
13351     // At most we can reference 2 inputs in the final shuffle.
13352     if (SV0.isUndef() || SV0 == ExtVec) {
13353       SV0 = ExtVec;
13354       for (int i = 0; i != NumOpElts; ++i)
13355         Mask.push_back(i + ExtIdx);
13356     } else if (SV1.isUndef() || SV1 == ExtVec) {
13357       SV1 = ExtVec;
13358       for (int i = 0; i != NumOpElts; ++i)
13359         Mask.push_back(i + ExtIdx + NumElts);
13360     } else {
13361       return SDValue();
13362     }
13363   }
13364 
13365   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
13366     return SDValue();
13367 
13368   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
13369                               DAG.getBitcast(VT, SV1), Mask);
13370 }
13371 
13372 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
13373   // If we only have one input vector, we don't need to do any concatenation.
13374   if (N->getNumOperands() == 1)
13375     return N->getOperand(0);
13376 
13377   // Check if all of the operands are undefs.
13378   EVT VT = N->getValueType(0);
13379   if (ISD::allOperandsUndef(N))
13380     return DAG.getUNDEF(VT);
13381 
13382   // Optimize concat_vectors where all but the first of the vectors are undef.
13383   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
13384         return Op.isUndef();
13385       })) {
13386     SDValue In = N->getOperand(0);
13387     assert(In.getValueType().isVector() && "Must concat vectors");
13388 
13389     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
13390     if (In->getOpcode() == ISD::BITCAST &&
13391         !In->getOperand(0)->getValueType(0).isVector()) {
13392       SDValue Scalar = In->getOperand(0);
13393 
13394       // If the bitcast type isn't legal, it might be a trunc of a legal type;
13395       // look through the trunc so we can still do the transform:
13396       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
13397       if (Scalar->getOpcode() == ISD::TRUNCATE &&
13398           !TLI.isTypeLegal(Scalar.getValueType()) &&
13399           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
13400         Scalar = Scalar->getOperand(0);
13401 
13402       EVT SclTy = Scalar->getValueType(0);
13403 
13404       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
13405         return SDValue();
13406 
13407       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
13408                                  VT.getSizeInBits() / SclTy.getSizeInBits());
13409       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
13410         return SDValue();
13411 
13412       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
13413       return DAG.getBitcast(VT, Res);
13414     }
13415   }
13416 
13417   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
13418   // We have already tested above for an UNDEF only concatenation.
13419   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
13420   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
13421   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
13422     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
13423   };
13424   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
13425     SmallVector<SDValue, 8> Opnds;
13426     EVT SVT = VT.getScalarType();
13427 
13428     EVT MinVT = SVT;
13429     if (!SVT.isFloatingPoint()) {
13430       // If BUILD_VECTOR are from built from integer, they may have different
13431       // operand types. Get the smallest type and truncate all operands to it.
13432       bool FoundMinVT = false;
13433       for (const SDValue &Op : N->ops())
13434         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13435           EVT OpSVT = Op.getOperand(0)->getValueType(0);
13436           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
13437           FoundMinVT = true;
13438         }
13439       assert(FoundMinVT && "Concat vector type mismatch");
13440     }
13441 
13442     for (const SDValue &Op : N->ops()) {
13443       EVT OpVT = Op.getValueType();
13444       unsigned NumElts = OpVT.getVectorNumElements();
13445 
13446       if (ISD::UNDEF == Op.getOpcode())
13447         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
13448 
13449       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13450         if (SVT.isFloatingPoint()) {
13451           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
13452           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
13453         } else {
13454           for (unsigned i = 0; i != NumElts; ++i)
13455             Opnds.push_back(
13456                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
13457         }
13458       }
13459     }
13460 
13461     assert(VT.getVectorNumElements() == Opnds.size() &&
13462            "Concat vector type mismatch");
13463     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13464   }
13465 
13466   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
13467   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
13468     return V;
13469 
13470   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
13471   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
13472     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
13473       return V;
13474 
13475   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
13476   // nodes often generate nop CONCAT_VECTOR nodes.
13477   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
13478   // place the incoming vectors at the exact same location.
13479   SDValue SingleSource = SDValue();
13480   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
13481 
13482   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13483     SDValue Op = N->getOperand(i);
13484 
13485     if (Op.isUndef())
13486       continue;
13487 
13488     // Check if this is the identity extract:
13489     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13490       return SDValue();
13491 
13492     // Find the single incoming vector for the extract_subvector.
13493     if (SingleSource.getNode()) {
13494       if (Op.getOperand(0) != SingleSource)
13495         return SDValue();
13496     } else {
13497       SingleSource = Op.getOperand(0);
13498 
13499       // Check the source type is the same as the type of the result.
13500       // If not, this concat may extend the vector, so we can not
13501       // optimize it away.
13502       if (SingleSource.getValueType() != N->getValueType(0))
13503         return SDValue();
13504     }
13505 
13506     unsigned IdentityIndex = i * PartNumElem;
13507     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13508     // The extract index must be constant.
13509     if (!CS)
13510       return SDValue();
13511 
13512     // Check that we are reading from the identity index.
13513     if (CS->getZExtValue() != IdentityIndex)
13514       return SDValue();
13515   }
13516 
13517   if (SingleSource.getNode())
13518     return SingleSource;
13519 
13520   return SDValue();
13521 }
13522 
13523 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
13524   EVT NVT = N->getValueType(0);
13525   SDValue V = N->getOperand(0);
13526 
13527   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
13528     // Combine:
13529     //    (extract_subvec (concat V1, V2, ...), i)
13530     // Into:
13531     //    Vi if possible
13532     // Only operand 0 is checked as 'concat' assumes all inputs of the same
13533     // type.
13534     if (V->getOperand(0).getValueType() != NVT)
13535       return SDValue();
13536     unsigned Idx = N->getConstantOperandVal(1);
13537     unsigned NumElems = NVT.getVectorNumElements();
13538     assert((Idx % NumElems) == 0 &&
13539            "IDX in concat is not a multiple of the result vector length.");
13540     return V->getOperand(Idx / NumElems);
13541   }
13542 
13543   // Skip bitcasting
13544   if (V->getOpcode() == ISD::BITCAST)
13545     V = V.getOperand(0);
13546 
13547   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
13548     // Handle only simple case where vector being inserted and vector
13549     // being extracted are of same type, and are half size of larger vectors.
13550     EVT BigVT = V->getOperand(0).getValueType();
13551     EVT SmallVT = V->getOperand(1).getValueType();
13552     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
13553       return SDValue();
13554 
13555     // Only handle cases where both indexes are constants with the same type.
13556     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
13557     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
13558 
13559     if (InsIdx && ExtIdx &&
13560         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
13561         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
13562       // Combine:
13563       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
13564       // Into:
13565       //    indices are equal or bit offsets are equal => V1
13566       //    otherwise => (extract_subvec V1, ExtIdx)
13567       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
13568           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
13569         return DAG.getBitcast(NVT, V->getOperand(1));
13570       return DAG.getNode(
13571           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
13572           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
13573           N->getOperand(1));
13574     }
13575   }
13576 
13577   return SDValue();
13578 }
13579 
13580 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
13581                                                  SDValue V, SelectionDAG &DAG) {
13582   SDLoc DL(V);
13583   EVT VT = V.getValueType();
13584 
13585   switch (V.getOpcode()) {
13586   default:
13587     return V;
13588 
13589   case ISD::CONCAT_VECTORS: {
13590     EVT OpVT = V->getOperand(0).getValueType();
13591     int OpSize = OpVT.getVectorNumElements();
13592     SmallBitVector OpUsedElements(OpSize, false);
13593     bool FoundSimplification = false;
13594     SmallVector<SDValue, 4> NewOps;
13595     NewOps.reserve(V->getNumOperands());
13596     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
13597       SDValue Op = V->getOperand(i);
13598       bool OpUsed = false;
13599       for (int j = 0; j < OpSize; ++j)
13600         if (UsedElements[i * OpSize + j]) {
13601           OpUsedElements[j] = true;
13602           OpUsed = true;
13603         }
13604       NewOps.push_back(
13605           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
13606                  : DAG.getUNDEF(OpVT));
13607       FoundSimplification |= Op == NewOps.back();
13608       OpUsedElements.reset();
13609     }
13610     if (FoundSimplification)
13611       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
13612     return V;
13613   }
13614 
13615   case ISD::INSERT_SUBVECTOR: {
13616     SDValue BaseV = V->getOperand(0);
13617     SDValue SubV = V->getOperand(1);
13618     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
13619     if (!IdxN)
13620       return V;
13621 
13622     int SubSize = SubV.getValueType().getVectorNumElements();
13623     int Idx = IdxN->getZExtValue();
13624     bool SubVectorUsed = false;
13625     SmallBitVector SubUsedElements(SubSize, false);
13626     for (int i = 0; i < SubSize; ++i)
13627       if (UsedElements[i + Idx]) {
13628         SubVectorUsed = true;
13629         SubUsedElements[i] = true;
13630         UsedElements[i + Idx] = false;
13631       }
13632 
13633     // Now recurse on both the base and sub vectors.
13634     SDValue SimplifiedSubV =
13635         SubVectorUsed
13636             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
13637             : DAG.getUNDEF(SubV.getValueType());
13638     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
13639     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
13640       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
13641                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
13642     return V;
13643   }
13644   }
13645 }
13646 
13647 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
13648                                        SDValue N1, SelectionDAG &DAG) {
13649   EVT VT = SVN->getValueType(0);
13650   int NumElts = VT.getVectorNumElements();
13651   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
13652   for (int M : SVN->getMask())
13653     if (M >= 0 && M < NumElts)
13654       N0UsedElements[M] = true;
13655     else if (M >= NumElts)
13656       N1UsedElements[M - NumElts] = true;
13657 
13658   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
13659   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
13660   if (S0 == N0 && S1 == N1)
13661     return SDValue();
13662 
13663   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
13664 }
13665 
13666 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
13667 // or turn a shuffle of a single concat into simpler shuffle then concat.
13668 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
13669   EVT VT = N->getValueType(0);
13670   unsigned NumElts = VT.getVectorNumElements();
13671 
13672   SDValue N0 = N->getOperand(0);
13673   SDValue N1 = N->getOperand(1);
13674   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13675 
13676   SmallVector<SDValue, 4> Ops;
13677   EVT ConcatVT = N0.getOperand(0).getValueType();
13678   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
13679   unsigned NumConcats = NumElts / NumElemsPerConcat;
13680 
13681   // Special case: shuffle(concat(A,B)) can be more efficiently represented
13682   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
13683   // half vector elements.
13684   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
13685       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
13686                   SVN->getMask().end(), [](int i) { return i == -1; })) {
13687     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
13688                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
13689     N1 = DAG.getUNDEF(ConcatVT);
13690     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
13691   }
13692 
13693   // Look at every vector that's inserted. We're looking for exact
13694   // subvector-sized copies from a concatenated vector
13695   for (unsigned I = 0; I != NumConcats; ++I) {
13696     // Make sure we're dealing with a copy.
13697     unsigned Begin = I * NumElemsPerConcat;
13698     bool AllUndef = true, NoUndef = true;
13699     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
13700       if (SVN->getMaskElt(J) >= 0)
13701         AllUndef = false;
13702       else
13703         NoUndef = false;
13704     }
13705 
13706     if (NoUndef) {
13707       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
13708         return SDValue();
13709 
13710       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
13711         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
13712           return SDValue();
13713 
13714       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
13715       if (FirstElt < N0.getNumOperands())
13716         Ops.push_back(N0.getOperand(FirstElt));
13717       else
13718         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
13719 
13720     } else if (AllUndef) {
13721       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
13722     } else { // Mixed with general masks and undefs, can't do optimization.
13723       return SDValue();
13724     }
13725   }
13726 
13727   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
13728 }
13729 
13730 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
13731   EVT VT = N->getValueType(0);
13732   unsigned NumElts = VT.getVectorNumElements();
13733 
13734   SDValue N0 = N->getOperand(0);
13735   SDValue N1 = N->getOperand(1);
13736 
13737   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
13738 
13739   // Canonicalize shuffle undef, undef -> undef
13740   if (N0.isUndef() && N1.isUndef())
13741     return DAG.getUNDEF(VT);
13742 
13743   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13744 
13745   // Canonicalize shuffle v, v -> v, undef
13746   if (N0 == N1) {
13747     SmallVector<int, 8> NewMask;
13748     for (unsigned i = 0; i != NumElts; ++i) {
13749       int Idx = SVN->getMaskElt(i);
13750       if (Idx >= (int)NumElts) Idx -= NumElts;
13751       NewMask.push_back(Idx);
13752     }
13753     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
13754   }
13755 
13756   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
13757   if (N0.isUndef())
13758     return DAG.getCommutedVectorShuffle(*SVN);
13759 
13760   // Remove references to rhs if it is undef
13761   if (N1.isUndef()) {
13762     bool Changed = false;
13763     SmallVector<int, 8> NewMask;
13764     for (unsigned i = 0; i != NumElts; ++i) {
13765       int Idx = SVN->getMaskElt(i);
13766       if (Idx >= (int)NumElts) {
13767         Idx = -1;
13768         Changed = true;
13769       }
13770       NewMask.push_back(Idx);
13771     }
13772     if (Changed)
13773       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
13774   }
13775 
13776   // If it is a splat, check if the argument vector is another splat or a
13777   // build_vector.
13778   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
13779     SDNode *V = N0.getNode();
13780 
13781     // If this is a bit convert that changes the element type of the vector but
13782     // not the number of vector elements, look through it.  Be careful not to
13783     // look though conversions that change things like v4f32 to v2f64.
13784     if (V->getOpcode() == ISD::BITCAST) {
13785       SDValue ConvInput = V->getOperand(0);
13786       if (ConvInput.getValueType().isVector() &&
13787           ConvInput.getValueType().getVectorNumElements() == NumElts)
13788         V = ConvInput.getNode();
13789     }
13790 
13791     if (V->getOpcode() == ISD::BUILD_VECTOR) {
13792       assert(V->getNumOperands() == NumElts &&
13793              "BUILD_VECTOR has wrong number of operands");
13794       SDValue Base;
13795       bool AllSame = true;
13796       for (unsigned i = 0; i != NumElts; ++i) {
13797         if (!V->getOperand(i).isUndef()) {
13798           Base = V->getOperand(i);
13799           break;
13800         }
13801       }
13802       // Splat of <u, u, u, u>, return <u, u, u, u>
13803       if (!Base.getNode())
13804         return N0;
13805       for (unsigned i = 0; i != NumElts; ++i) {
13806         if (V->getOperand(i) != Base) {
13807           AllSame = false;
13808           break;
13809         }
13810       }
13811       // Splat of <x, x, x, x>, return <x, x, x, x>
13812       if (AllSame)
13813         return N0;
13814 
13815       // Canonicalize any other splat as a build_vector.
13816       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
13817       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
13818       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
13819 
13820       // We may have jumped through bitcasts, so the type of the
13821       // BUILD_VECTOR may not match the type of the shuffle.
13822       if (V->getValueType(0) != VT)
13823         NewBV = DAG.getBitcast(VT, NewBV);
13824       return NewBV;
13825     }
13826   }
13827 
13828   // There are various patterns used to build up a vector from smaller vectors,
13829   // subvectors, or elements. Scan chains of these and replace unused insertions
13830   // or components with undef.
13831   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
13832     return S;
13833 
13834   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
13835       Level < AfterLegalizeVectorOps &&
13836       (N1.isUndef() ||
13837       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13838        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
13839     if (SDValue V = partitionShuffleOfConcats(N, DAG))
13840       return V;
13841   }
13842 
13843   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
13844   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
13845   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
13846     SmallVector<SDValue, 8> Ops;
13847     for (int M : SVN->getMask()) {
13848       SDValue Op = DAG.getUNDEF(VT.getScalarType());
13849       if (M >= 0) {
13850         int Idx = M % NumElts;
13851         SDValue &S = (M < (int)NumElts ? N0 : N1);
13852         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
13853           Op = S.getOperand(Idx);
13854         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
13855           if (Idx == 0)
13856             Op = S.getOperand(0);
13857         } else {
13858           // Operand can't be combined - bail out.
13859           break;
13860         }
13861       }
13862       Ops.push_back(Op);
13863     }
13864     if (Ops.size() == VT.getVectorNumElements()) {
13865       // BUILD_VECTOR requires all inputs to be of the same type, find the
13866       // maximum type and extend them all.
13867       EVT SVT = VT.getScalarType();
13868       if (SVT.isInteger())
13869         for (SDValue &Op : Ops)
13870           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
13871       if (SVT != VT.getScalarType())
13872         for (SDValue &Op : Ops)
13873           Op = TLI.isZExtFree(Op.getValueType(), SVT)
13874                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
13875                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
13876       return DAG.getBuildVector(VT, SDLoc(N), Ops);
13877     }
13878   }
13879 
13880   // If this shuffle only has a single input that is a bitcasted shuffle,
13881   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
13882   // back to their original types.
13883   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13884       N1.isUndef() && Level < AfterLegalizeVectorOps &&
13885       TLI.isTypeLegal(VT)) {
13886 
13887     // Peek through the bitcast only if there is one user.
13888     SDValue BC0 = N0;
13889     while (BC0.getOpcode() == ISD::BITCAST) {
13890       if (!BC0.hasOneUse())
13891         break;
13892       BC0 = BC0.getOperand(0);
13893     }
13894 
13895     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
13896       if (Scale == 1)
13897         return SmallVector<int, 8>(Mask.begin(), Mask.end());
13898 
13899       SmallVector<int, 8> NewMask;
13900       for (int M : Mask)
13901         for (int s = 0; s != Scale; ++s)
13902           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
13903       return NewMask;
13904     };
13905 
13906     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
13907       EVT SVT = VT.getScalarType();
13908       EVT InnerVT = BC0->getValueType(0);
13909       EVT InnerSVT = InnerVT.getScalarType();
13910 
13911       // Determine which shuffle works with the smaller scalar type.
13912       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
13913       EVT ScaleSVT = ScaleVT.getScalarType();
13914 
13915       if (TLI.isTypeLegal(ScaleVT) &&
13916           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
13917           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
13918 
13919         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13920         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13921 
13922         // Scale the shuffle masks to the smaller scalar type.
13923         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
13924         SmallVector<int, 8> InnerMask =
13925             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
13926         SmallVector<int, 8> OuterMask =
13927             ScaleShuffleMask(SVN->getMask(), OuterScale);
13928 
13929         // Merge the shuffle masks.
13930         SmallVector<int, 8> NewMask;
13931         for (int M : OuterMask)
13932           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
13933 
13934         // Test for shuffle mask legality over both commutations.
13935         SDValue SV0 = BC0->getOperand(0);
13936         SDValue SV1 = BC0->getOperand(1);
13937         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13938         if (!LegalMask) {
13939           std::swap(SV0, SV1);
13940           ShuffleVectorSDNode::commuteMask(NewMask);
13941           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13942         }
13943 
13944         if (LegalMask) {
13945           SV0 = DAG.getBitcast(ScaleVT, SV0);
13946           SV1 = DAG.getBitcast(ScaleVT, SV1);
13947           return DAG.getBitcast(
13948               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
13949         }
13950       }
13951     }
13952   }
13953 
13954   // Canonicalize shuffles according to rules:
13955   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
13956   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
13957   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
13958   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
13959       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
13960       TLI.isTypeLegal(VT)) {
13961     // The incoming shuffle must be of the same type as the result of the
13962     // current shuffle.
13963     assert(N1->getOperand(0).getValueType() == VT &&
13964            "Shuffle types don't match");
13965 
13966     SDValue SV0 = N1->getOperand(0);
13967     SDValue SV1 = N1->getOperand(1);
13968     bool HasSameOp0 = N0 == SV0;
13969     bool IsSV1Undef = SV1.isUndef();
13970     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
13971       // Commute the operands of this shuffle so that next rule
13972       // will trigger.
13973       return DAG.getCommutedVectorShuffle(*SVN);
13974   }
13975 
13976   // Try to fold according to rules:
13977   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
13978   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
13979   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
13980   // Don't try to fold shuffles with illegal type.
13981   // Only fold if this shuffle is the only user of the other shuffle.
13982   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
13983       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
13984     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
13985 
13986     // The incoming shuffle must be of the same type as the result of the
13987     // current shuffle.
13988     assert(OtherSV->getOperand(0).getValueType() == VT &&
13989            "Shuffle types don't match");
13990 
13991     SDValue SV0, SV1;
13992     SmallVector<int, 4> Mask;
13993     // Compute the combined shuffle mask for a shuffle with SV0 as the first
13994     // operand, and SV1 as the second operand.
13995     for (unsigned i = 0; i != NumElts; ++i) {
13996       int Idx = SVN->getMaskElt(i);
13997       if (Idx < 0) {
13998         // Propagate Undef.
13999         Mask.push_back(Idx);
14000         continue;
14001       }
14002 
14003       SDValue CurrentVec;
14004       if (Idx < (int)NumElts) {
14005         // This shuffle index refers to the inner shuffle N0. Lookup the inner
14006         // shuffle mask to identify which vector is actually referenced.
14007         Idx = OtherSV->getMaskElt(Idx);
14008         if (Idx < 0) {
14009           // Propagate Undef.
14010           Mask.push_back(Idx);
14011           continue;
14012         }
14013 
14014         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
14015                                            : OtherSV->getOperand(1);
14016       } else {
14017         // This shuffle index references an element within N1.
14018         CurrentVec = N1;
14019       }
14020 
14021       // Simple case where 'CurrentVec' is UNDEF.
14022       if (CurrentVec.isUndef()) {
14023         Mask.push_back(-1);
14024         continue;
14025       }
14026 
14027       // Canonicalize the shuffle index. We don't know yet if CurrentVec
14028       // will be the first or second operand of the combined shuffle.
14029       Idx = Idx % NumElts;
14030       if (!SV0.getNode() || SV0 == CurrentVec) {
14031         // Ok. CurrentVec is the left hand side.
14032         // Update the mask accordingly.
14033         SV0 = CurrentVec;
14034         Mask.push_back(Idx);
14035         continue;
14036       }
14037 
14038       // Bail out if we cannot convert the shuffle pair into a single shuffle.
14039       if (SV1.getNode() && SV1 != CurrentVec)
14040         return SDValue();
14041 
14042       // Ok. CurrentVec is the right hand side.
14043       // Update the mask accordingly.
14044       SV1 = CurrentVec;
14045       Mask.push_back(Idx + NumElts);
14046     }
14047 
14048     // Check if all indices in Mask are Undef. In case, propagate Undef.
14049     bool isUndefMask = true;
14050     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
14051       isUndefMask &= Mask[i] < 0;
14052 
14053     if (isUndefMask)
14054       return DAG.getUNDEF(VT);
14055 
14056     if (!SV0.getNode())
14057       SV0 = DAG.getUNDEF(VT);
14058     if (!SV1.getNode())
14059       SV1 = DAG.getUNDEF(VT);
14060 
14061     // Avoid introducing shuffles with illegal mask.
14062     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
14063       ShuffleVectorSDNode::commuteMask(Mask);
14064 
14065       if (!TLI.isShuffleMaskLegal(Mask, VT))
14066         return SDValue();
14067 
14068       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
14069       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
14070       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
14071       std::swap(SV0, SV1);
14072     }
14073 
14074     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14075     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14076     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14077     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
14078   }
14079 
14080   return SDValue();
14081 }
14082 
14083 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
14084   SDValue InVal = N->getOperand(0);
14085   EVT VT = N->getValueType(0);
14086 
14087   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
14088   // with a VECTOR_SHUFFLE.
14089   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14090     SDValue InVec = InVal->getOperand(0);
14091     SDValue EltNo = InVal->getOperand(1);
14092 
14093     // FIXME: We could support implicit truncation if the shuffle can be
14094     // scaled to a smaller vector scalar type.
14095     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
14096     if (C0 && VT == InVec.getValueType() &&
14097         VT.getScalarType() == InVal.getValueType()) {
14098       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
14099       int Elt = C0->getZExtValue();
14100       NewMask[0] = Elt;
14101 
14102       if (TLI.isShuffleMaskLegal(NewMask, VT))
14103         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
14104                                     NewMask);
14105     }
14106   }
14107 
14108   return SDValue();
14109 }
14110 
14111 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
14112   EVT VT = N->getValueType(0);
14113   SDValue N0 = N->getOperand(0);
14114   SDValue N1 = N->getOperand(1);
14115   SDValue N2 = N->getOperand(2);
14116 
14117   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
14118   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
14119   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
14120   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
14121       N0.getOperand(1).getValueType() == N1.getValueType() &&
14122       N0.getOperand(2) == N2)
14123     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
14124                        N1, N2);
14125 
14126   if (N0.getValueType() != N1.getValueType())
14127     return SDValue();
14128 
14129   // If the input vector is a concatenation, and the insert replaces
14130   // one of the halves, we can optimize into a single concat_vectors.
14131   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
14132       N2.getOpcode() == ISD::Constant) {
14133     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
14134 
14135     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14136     // (concat_vectors Z, Y)
14137     if (InsIdx == 0)
14138       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
14139                          N0.getOperand(1));
14140 
14141     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14142     // (concat_vectors X, Z)
14143     if (InsIdx == VT.getVectorNumElements() / 2)
14144       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
14145                          N1);
14146   }
14147 
14148   return SDValue();
14149 }
14150 
14151 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
14152   SDValue N0 = N->getOperand(0);
14153 
14154   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
14155   if (N0->getOpcode() == ISD::FP16_TO_FP)
14156     return N0->getOperand(0);
14157 
14158   return SDValue();
14159 }
14160 
14161 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
14162   SDValue N0 = N->getOperand(0);
14163 
14164   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
14165   if (N0->getOpcode() == ISD::AND) {
14166     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
14167     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
14168       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
14169                          N0.getOperand(0));
14170     }
14171   }
14172 
14173   return SDValue();
14174 }
14175 
14176 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
14177 /// with the destination vector and a zero vector.
14178 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
14179 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
14180 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
14181   EVT VT = N->getValueType(0);
14182   SDValue LHS = N->getOperand(0);
14183   SDValue RHS = N->getOperand(1);
14184   SDLoc DL(N);
14185 
14186   // Make sure we're not running after operation legalization where it
14187   // may have custom lowered the vector shuffles.
14188   if (LegalOperations)
14189     return SDValue();
14190 
14191   if (N->getOpcode() != ISD::AND)
14192     return SDValue();
14193 
14194   if (RHS.getOpcode() == ISD::BITCAST)
14195     RHS = RHS.getOperand(0);
14196 
14197   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
14198     return SDValue();
14199 
14200   EVT RVT = RHS.getValueType();
14201   unsigned NumElts = RHS.getNumOperands();
14202 
14203   // Attempt to create a valid clear mask, splitting the mask into
14204   // sub elements and checking to see if each is
14205   // all zeros or all ones - suitable for shuffle masking.
14206   auto BuildClearMask = [&](int Split) {
14207     int NumSubElts = NumElts * Split;
14208     int NumSubBits = RVT.getScalarSizeInBits() / Split;
14209 
14210     SmallVector<int, 8> Indices;
14211     for (int i = 0; i != NumSubElts; ++i) {
14212       int EltIdx = i / Split;
14213       int SubIdx = i % Split;
14214       SDValue Elt = RHS.getOperand(EltIdx);
14215       if (Elt.isUndef()) {
14216         Indices.push_back(-1);
14217         continue;
14218       }
14219 
14220       APInt Bits;
14221       if (isa<ConstantSDNode>(Elt))
14222         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
14223       else if (isa<ConstantFPSDNode>(Elt))
14224         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
14225       else
14226         return SDValue();
14227 
14228       // Extract the sub element from the constant bit mask.
14229       if (DAG.getDataLayout().isBigEndian()) {
14230         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
14231       } else {
14232         Bits = Bits.lshr(SubIdx * NumSubBits);
14233       }
14234 
14235       if (Split > 1)
14236         Bits = Bits.trunc(NumSubBits);
14237 
14238       if (Bits.isAllOnesValue())
14239         Indices.push_back(i);
14240       else if (Bits == 0)
14241         Indices.push_back(i + NumSubElts);
14242       else
14243         return SDValue();
14244     }
14245 
14246     // Let's see if the target supports this vector_shuffle.
14247     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
14248     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
14249     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
14250       return SDValue();
14251 
14252     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
14253     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
14254                                                    DAG.getBitcast(ClearVT, LHS),
14255                                                    Zero, Indices));
14256   };
14257 
14258   // Determine maximum split level (byte level masking).
14259   int MaxSplit = 1;
14260   if (RVT.getScalarSizeInBits() % 8 == 0)
14261     MaxSplit = RVT.getScalarSizeInBits() / 8;
14262 
14263   for (int Split = 1; Split <= MaxSplit; ++Split)
14264     if (RVT.getScalarSizeInBits() % Split == 0)
14265       if (SDValue S = BuildClearMask(Split))
14266         return S;
14267 
14268   return SDValue();
14269 }
14270 
14271 /// Visit a binary vector operation, like ADD.
14272 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
14273   assert(N->getValueType(0).isVector() &&
14274          "SimplifyVBinOp only works on vectors!");
14275 
14276   SDValue LHS = N->getOperand(0);
14277   SDValue RHS = N->getOperand(1);
14278   SDValue Ops[] = {LHS, RHS};
14279 
14280   // See if we can constant fold the vector operation.
14281   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
14282           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
14283     return Fold;
14284 
14285   // Try to convert a constant mask AND into a shuffle clear mask.
14286   if (SDValue Shuffle = XformToShuffleWithZero(N))
14287     return Shuffle;
14288 
14289   // Type legalization might introduce new shuffles in the DAG.
14290   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
14291   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
14292   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
14293       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
14294       LHS.getOperand(1).isUndef() &&
14295       RHS.getOperand(1).isUndef()) {
14296     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
14297     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
14298 
14299     if (SVN0->getMask().equals(SVN1->getMask())) {
14300       EVT VT = N->getValueType(0);
14301       SDValue UndefVector = LHS.getOperand(1);
14302       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
14303                                      LHS.getOperand(0), RHS.getOperand(0),
14304                                      N->getFlags());
14305       AddUsersToWorklist(N);
14306       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
14307                                   SVN0->getMask());
14308     }
14309   }
14310 
14311   return SDValue();
14312 }
14313 
14314 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
14315                                     SDValue N2) {
14316   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
14317 
14318   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
14319                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
14320 
14321   // If we got a simplified select_cc node back from SimplifySelectCC, then
14322   // break it down into a new SETCC node, and a new SELECT node, and then return
14323   // the SELECT node, since we were called with a SELECT node.
14324   if (SCC.getNode()) {
14325     // Check to see if we got a select_cc back (to turn into setcc/select).
14326     // Otherwise, just return whatever node we got back, like fabs.
14327     if (SCC.getOpcode() == ISD::SELECT_CC) {
14328       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
14329                                   N0.getValueType(),
14330                                   SCC.getOperand(0), SCC.getOperand(1),
14331                                   SCC.getOperand(4));
14332       AddToWorklist(SETCC.getNode());
14333       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
14334                            SCC.getOperand(2), SCC.getOperand(3));
14335     }
14336 
14337     return SCC;
14338   }
14339   return SDValue();
14340 }
14341 
14342 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
14343 /// being selected between, see if we can simplify the select.  Callers of this
14344 /// should assume that TheSelect is deleted if this returns true.  As such, they
14345 /// should return the appropriate thing (e.g. the node) back to the top-level of
14346 /// the DAG combiner loop to avoid it being looked at.
14347 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
14348                                     SDValue RHS) {
14349 
14350   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14351   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
14352   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
14353     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
14354       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
14355       SDValue Sqrt = RHS;
14356       ISD::CondCode CC;
14357       SDValue CmpLHS;
14358       const ConstantFPSDNode *Zero = nullptr;
14359 
14360       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
14361         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
14362         CmpLHS = TheSelect->getOperand(0);
14363         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
14364       } else {
14365         // SELECT or VSELECT
14366         SDValue Cmp = TheSelect->getOperand(0);
14367         if (Cmp.getOpcode() == ISD::SETCC) {
14368           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
14369           CmpLHS = Cmp.getOperand(0);
14370           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
14371         }
14372       }
14373       if (Zero && Zero->isZero() &&
14374           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
14375           CC == ISD::SETULT || CC == ISD::SETLT)) {
14376         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14377         CombineTo(TheSelect, Sqrt);
14378         return true;
14379       }
14380     }
14381   }
14382   // Cannot simplify select with vector condition
14383   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
14384 
14385   // If this is a select from two identical things, try to pull the operation
14386   // through the select.
14387   if (LHS.getOpcode() != RHS.getOpcode() ||
14388       !LHS.hasOneUse() || !RHS.hasOneUse())
14389     return false;
14390 
14391   // If this is a load and the token chain is identical, replace the select
14392   // of two loads with a load through a select of the address to load from.
14393   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
14394   // constants have been dropped into the constant pool.
14395   if (LHS.getOpcode() == ISD::LOAD) {
14396     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
14397     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
14398 
14399     // Token chains must be identical.
14400     if (LHS.getOperand(0) != RHS.getOperand(0) ||
14401         // Do not let this transformation reduce the number of volatile loads.
14402         LLD->isVolatile() || RLD->isVolatile() ||
14403         // FIXME: If either is a pre/post inc/dec load,
14404         // we'd need to split out the address adjustment.
14405         LLD->isIndexed() || RLD->isIndexed() ||
14406         // If this is an EXTLOAD, the VT's must match.
14407         LLD->getMemoryVT() != RLD->getMemoryVT() ||
14408         // If this is an EXTLOAD, the kind of extension must match.
14409         (LLD->getExtensionType() != RLD->getExtensionType() &&
14410          // The only exception is if one of the extensions is anyext.
14411          LLD->getExtensionType() != ISD::EXTLOAD &&
14412          RLD->getExtensionType() != ISD::EXTLOAD) ||
14413         // FIXME: this discards src value information.  This is
14414         // over-conservative. It would be beneficial to be able to remember
14415         // both potential memory locations.  Since we are discarding
14416         // src value info, don't do the transformation if the memory
14417         // locations are not in the default address space.
14418         LLD->getPointerInfo().getAddrSpace() != 0 ||
14419         RLD->getPointerInfo().getAddrSpace() != 0 ||
14420         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
14421                                       LLD->getBasePtr().getValueType()))
14422       return false;
14423 
14424     // Check that the select condition doesn't reach either load.  If so,
14425     // folding this will induce a cycle into the DAG.  If not, this is safe to
14426     // xform, so create a select of the addresses.
14427     SDValue Addr;
14428     if (TheSelect->getOpcode() == ISD::SELECT) {
14429       SDNode *CondNode = TheSelect->getOperand(0).getNode();
14430       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
14431           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
14432         return false;
14433       // The loads must not depend on one another.
14434       if (LLD->isPredecessorOf(RLD) ||
14435           RLD->isPredecessorOf(LLD))
14436         return false;
14437       Addr = DAG.getSelect(SDLoc(TheSelect),
14438                            LLD->getBasePtr().getValueType(),
14439                            TheSelect->getOperand(0), LLD->getBasePtr(),
14440                            RLD->getBasePtr());
14441     } else {  // Otherwise SELECT_CC
14442       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
14443       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
14444 
14445       if ((LLD->hasAnyUseOfValue(1) &&
14446            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
14447           (RLD->hasAnyUseOfValue(1) &&
14448            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
14449         return false;
14450 
14451       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
14452                          LLD->getBasePtr().getValueType(),
14453                          TheSelect->getOperand(0),
14454                          TheSelect->getOperand(1),
14455                          LLD->getBasePtr(), RLD->getBasePtr(),
14456                          TheSelect->getOperand(4));
14457     }
14458 
14459     SDValue Load;
14460     // It is safe to replace the two loads if they have different alignments,
14461     // but the new load must be the minimum (most restrictive) alignment of the
14462     // inputs.
14463     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
14464     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
14465     if (!RLD->isInvariant())
14466       MMOFlags &= ~MachineMemOperand::MOInvariant;
14467     if (!RLD->isDereferenceable())
14468       MMOFlags &= ~MachineMemOperand::MODereferenceable;
14469     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
14470       // FIXME: Discards pointer and AA info.
14471       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
14472                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
14473                          MMOFlags);
14474     } else {
14475       // FIXME: Discards pointer and AA info.
14476       Load = DAG.getExtLoad(
14477           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
14478                                                   : LLD->getExtensionType(),
14479           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
14480           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
14481     }
14482 
14483     // Users of the select now use the result of the load.
14484     CombineTo(TheSelect, Load);
14485 
14486     // Users of the old loads now use the new load's chain.  We know the
14487     // old-load value is dead now.
14488     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
14489     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
14490     return true;
14491   }
14492 
14493   return false;
14494 }
14495 
14496 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
14497 /// where 'cond' is the comparison specified by CC.
14498 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
14499                                       SDValue N2, SDValue N3, ISD::CondCode CC,
14500                                       bool NotExtCompare) {
14501   // (x ? y : y) -> y.
14502   if (N2 == N3) return N2;
14503 
14504   EVT VT = N2.getValueType();
14505   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
14506   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
14507 
14508   // Determine if the condition we're dealing with is constant
14509   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
14510                               N0, N1, CC, DL, false);
14511   if (SCC.getNode()) AddToWorklist(SCC.getNode());
14512 
14513   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
14514     // fold select_cc true, x, y -> x
14515     // fold select_cc false, x, y -> y
14516     return !SCCC->isNullValue() ? N2 : N3;
14517   }
14518 
14519   // Check to see if we can simplify the select into an fabs node
14520   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14521     // Allow either -0.0 or 0.0
14522     if (CFP->isZero()) {
14523       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
14524       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
14525           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
14526           N2 == N3.getOperand(0))
14527         return DAG.getNode(ISD::FABS, DL, VT, N0);
14528 
14529       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
14530       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
14531           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
14532           N2.getOperand(0) == N3)
14533         return DAG.getNode(ISD::FABS, DL, VT, N3);
14534     }
14535   }
14536 
14537   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
14538   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
14539   // in it.  This is a win when the constant is not otherwise available because
14540   // it replaces two constant pool loads with one.  We only do this if the FP
14541   // type is known to be legal, because if it isn't, then we are before legalize
14542   // types an we want the other legalization to happen first (e.g. to avoid
14543   // messing with soft float) and if the ConstantFP is not legal, because if
14544   // it is legal, we may not need to store the FP constant in a constant pool.
14545   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
14546     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
14547       if (TLI.isTypeLegal(N2.getValueType()) &&
14548           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
14549                TargetLowering::Legal &&
14550            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
14551            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
14552           // If both constants have multiple uses, then we won't need to do an
14553           // extra load, they are likely around in registers for other users.
14554           (TV->hasOneUse() || FV->hasOneUse())) {
14555         Constant *Elts[] = {
14556           const_cast<ConstantFP*>(FV->getConstantFPValue()),
14557           const_cast<ConstantFP*>(TV->getConstantFPValue())
14558         };
14559         Type *FPTy = Elts[0]->getType();
14560         const DataLayout &TD = DAG.getDataLayout();
14561 
14562         // Create a ConstantArray of the two constants.
14563         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
14564         SDValue CPIdx =
14565             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
14566                                 TD.getPrefTypeAlignment(FPTy));
14567         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
14568 
14569         // Get the offsets to the 0 and 1 element of the array so that we can
14570         // select between them.
14571         SDValue Zero = DAG.getIntPtrConstant(0, DL);
14572         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
14573         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
14574 
14575         SDValue Cond = DAG.getSetCC(DL,
14576                                     getSetCCResultType(N0.getValueType()),
14577                                     N0, N1, CC);
14578         AddToWorklist(Cond.getNode());
14579         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
14580                                           Cond, One, Zero);
14581         AddToWorklist(CstOffset.getNode());
14582         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
14583                             CstOffset);
14584         AddToWorklist(CPIdx.getNode());
14585         return DAG.getLoad(
14586             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
14587             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
14588             Alignment);
14589       }
14590     }
14591 
14592   // Check to see if we can perform the "gzip trick", transforming
14593   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
14594   if (isNullConstant(N3) && CC == ISD::SETLT &&
14595       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
14596        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
14597     EVT XType = N0.getValueType();
14598     EVT AType = N2.getValueType();
14599     if (XType.bitsGE(AType)) {
14600       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
14601       // single-bit constant.
14602       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
14603         unsigned ShCtV = N2C->getAPIntValue().logBase2();
14604         ShCtV = XType.getSizeInBits() - ShCtV - 1;
14605         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
14606                                        getShiftAmountTy(N0.getValueType()));
14607         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
14608                                     XType, N0, ShCt);
14609         AddToWorklist(Shift.getNode());
14610 
14611         if (XType.bitsGT(AType)) {
14612           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14613           AddToWorklist(Shift.getNode());
14614         }
14615 
14616         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14617       }
14618 
14619       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
14620                                   XType, N0,
14621                                   DAG.getConstant(XType.getSizeInBits() - 1,
14622                                                   SDLoc(N0),
14623                                          getShiftAmountTy(N0.getValueType())));
14624       AddToWorklist(Shift.getNode());
14625 
14626       if (XType.bitsGT(AType)) {
14627         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14628         AddToWorklist(Shift.getNode());
14629       }
14630 
14631       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14632     }
14633   }
14634 
14635   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
14636   // where y is has a single bit set.
14637   // A plaintext description would be, we can turn the SELECT_CC into an AND
14638   // when the condition can be materialized as an all-ones register.  Any
14639   // single bit-test can be materialized as an all-ones register with
14640   // shift-left and shift-right-arith.
14641   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
14642       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
14643     SDValue AndLHS = N0->getOperand(0);
14644     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14645     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
14646       // Shift the tested bit over the sign bit.
14647       const APInt &AndMask = ConstAndRHS->getAPIntValue();
14648       SDValue ShlAmt =
14649         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
14650                         getShiftAmountTy(AndLHS.getValueType()));
14651       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
14652 
14653       // Now arithmetic right shift it all the way over, so the result is either
14654       // all-ones, or zero.
14655       SDValue ShrAmt =
14656         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
14657                         getShiftAmountTy(Shl.getValueType()));
14658       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
14659 
14660       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
14661     }
14662   }
14663 
14664   // fold select C, 16, 0 -> shl C, 4
14665   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
14666       TLI.getBooleanContents(N0.getValueType()) ==
14667           TargetLowering::ZeroOrOneBooleanContent) {
14668 
14669     // If the caller doesn't want us to simplify this into a zext of a compare,
14670     // don't do it.
14671     if (NotExtCompare && N2C->isOne())
14672       return SDValue();
14673 
14674     // Get a SetCC of the condition
14675     // NOTE: Don't create a SETCC if it's not legal on this target.
14676     if (!LegalOperations ||
14677         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
14678       SDValue Temp, SCC;
14679       // cast from setcc result type to select result type
14680       if (LegalTypes) {
14681         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
14682                             N0, N1, CC);
14683         if (N2.getValueType().bitsLT(SCC.getValueType()))
14684           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
14685                                         N2.getValueType());
14686         else
14687           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14688                              N2.getValueType(), SCC);
14689       } else {
14690         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
14691         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14692                            N2.getValueType(), SCC);
14693       }
14694 
14695       AddToWorklist(SCC.getNode());
14696       AddToWorklist(Temp.getNode());
14697 
14698       if (N2C->isOne())
14699         return Temp;
14700 
14701       // shl setcc result by log2 n2c
14702       return DAG.getNode(
14703           ISD::SHL, DL, N2.getValueType(), Temp,
14704           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
14705                           getShiftAmountTy(Temp.getValueType())));
14706     }
14707   }
14708 
14709   // Check to see if this is an integer abs.
14710   // select_cc setg[te] X,  0,  X, -X ->
14711   // select_cc setgt    X, -1,  X, -X ->
14712   // select_cc setl[te] X,  0, -X,  X ->
14713   // select_cc setlt    X,  1, -X,  X ->
14714   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14715   if (N1C) {
14716     ConstantSDNode *SubC = nullptr;
14717     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14718          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
14719         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
14720       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
14721     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
14722               (N1C->isOne() && CC == ISD::SETLT)) &&
14723              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
14724       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
14725 
14726     EVT XType = N0.getValueType();
14727     if (SubC && SubC->isNullValue() && XType.isInteger()) {
14728       SDLoc DL(N0);
14729       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
14730                                   N0,
14731                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
14732                                          getShiftAmountTy(N0.getValueType())));
14733       SDValue Add = DAG.getNode(ISD::ADD, DL,
14734                                 XType, N0, Shift);
14735       AddToWorklist(Shift.getNode());
14736       AddToWorklist(Add.getNode());
14737       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
14738     }
14739   }
14740 
14741   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
14742   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
14743   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
14744   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
14745   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
14746   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
14747   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
14748   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
14749   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
14750     SDValue ValueOnZero = N2;
14751     SDValue Count = N3;
14752     // If the condition is NE instead of E, swap the operands.
14753     if (CC == ISD::SETNE)
14754       std::swap(ValueOnZero, Count);
14755     // Check if the value on zero is a constant equal to the bits in the type.
14756     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
14757       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
14758         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
14759         // legal, combine to just cttz.
14760         if ((Count.getOpcode() == ISD::CTTZ ||
14761              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
14762             N0 == Count.getOperand(0) &&
14763             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
14764           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
14765         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
14766         // legal, combine to just ctlz.
14767         if ((Count.getOpcode() == ISD::CTLZ ||
14768              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
14769             N0 == Count.getOperand(0) &&
14770             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
14771           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
14772       }
14773     }
14774   }
14775 
14776   return SDValue();
14777 }
14778 
14779 /// This is a stub for TargetLowering::SimplifySetCC.
14780 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
14781                                    ISD::CondCode Cond, const SDLoc &DL,
14782                                    bool foldBooleans) {
14783   TargetLowering::DAGCombinerInfo
14784     DagCombineInfo(DAG, Level, false, this);
14785   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
14786 }
14787 
14788 /// Given an ISD::SDIV node expressing a divide by constant, return
14789 /// a DAG expression to select that will generate the same value by multiplying
14790 /// by a magic number.
14791 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14792 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
14793   // when optimising for minimum size, we don't want to expand a div to a mul
14794   // and a shift.
14795   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14796     return SDValue();
14797 
14798   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14799   if (!C)
14800     return SDValue();
14801 
14802   // Avoid division by zero.
14803   if (C->isNullValue())
14804     return SDValue();
14805 
14806   std::vector<SDNode*> Built;
14807   SDValue S =
14808       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14809 
14810   for (SDNode *N : Built)
14811     AddToWorklist(N);
14812   return S;
14813 }
14814 
14815 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
14816 /// DAG expression that will generate the same value by right shifting.
14817 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
14818   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14819   if (!C)
14820     return SDValue();
14821 
14822   // Avoid division by zero.
14823   if (C->isNullValue())
14824     return SDValue();
14825 
14826   std::vector<SDNode *> Built;
14827   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
14828 
14829   for (SDNode *N : Built)
14830     AddToWorklist(N);
14831   return S;
14832 }
14833 
14834 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
14835 /// expression that will generate the same value by multiplying by a magic
14836 /// number.
14837 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14838 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
14839   // when optimising for minimum size, we don't want to expand a div to a mul
14840   // and a shift.
14841   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14842     return SDValue();
14843 
14844   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14845   if (!C)
14846     return SDValue();
14847 
14848   // Avoid division by zero.
14849   if (C->isNullValue())
14850     return SDValue();
14851 
14852   std::vector<SDNode*> Built;
14853   SDValue S =
14854       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14855 
14856   for (SDNode *N : Built)
14857     AddToWorklist(N);
14858   return S;
14859 }
14860 
14861 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
14862   if (Level >= AfterLegalizeDAG)
14863     return SDValue();
14864 
14865   // TODO: Handle half and/or extended types?
14866   EVT VT = Op.getValueType();
14867   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
14868     return SDValue();
14869 
14870   // If estimates are explicitly disabled for this function, we're done.
14871   MachineFunction &MF = DAG.getMachineFunction();
14872   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
14873   if (Enabled == TLI.ReciprocalEstimate::Disabled)
14874     return SDValue();
14875 
14876   // Estimates may be explicitly enabled for this type with a custom number of
14877   // refinement steps.
14878   int Iterations = TLI.getDivRefinementSteps(VT, MF);
14879   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
14880     if (Iterations) {
14881       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14882       // For the reciprocal, we need to find the zero of the function:
14883       //   F(X) = A X - 1 [which has a zero at X = 1/A]
14884       //     =>
14885       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
14886       //     does not require additional intermediate precision]
14887       EVT VT = Op.getValueType();
14888       SDLoc DL(Op);
14889       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14890 
14891       AddToWorklist(Est.getNode());
14892 
14893       // Newton iterations: Est = Est + Est (1 - Arg * Est)
14894       for (int i = 0; i < Iterations; ++i) {
14895         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
14896         AddToWorklist(NewEst.getNode());
14897 
14898         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
14899         AddToWorklist(NewEst.getNode());
14900 
14901         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14902         AddToWorklist(NewEst.getNode());
14903 
14904         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
14905         AddToWorklist(Est.getNode());
14906       }
14907     }
14908     return Est;
14909   }
14910 
14911   return SDValue();
14912 }
14913 
14914 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14915 /// For the reciprocal sqrt, we need to find the zero of the function:
14916 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14917 ///     =>
14918 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
14919 /// As a result, we precompute A/2 prior to the iteration loop.
14920 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
14921                                          unsigned Iterations,
14922                                          SDNodeFlags *Flags, bool Reciprocal) {
14923   EVT VT = Arg.getValueType();
14924   SDLoc DL(Arg);
14925   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
14926 
14927   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
14928   // this entire sequence requires only one FP constant.
14929   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
14930   AddToWorklist(HalfArg.getNode());
14931 
14932   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
14933   AddToWorklist(HalfArg.getNode());
14934 
14935   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
14936   for (unsigned i = 0; i < Iterations; ++i) {
14937     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
14938     AddToWorklist(NewEst.getNode());
14939 
14940     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
14941     AddToWorklist(NewEst.getNode());
14942 
14943     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
14944     AddToWorklist(NewEst.getNode());
14945 
14946     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14947     AddToWorklist(Est.getNode());
14948   }
14949 
14950   // If non-reciprocal square root is requested, multiply the result by Arg.
14951   if (!Reciprocal) {
14952     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
14953     AddToWorklist(Est.getNode());
14954   }
14955 
14956   return Est;
14957 }
14958 
14959 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14960 /// For the reciprocal sqrt, we need to find the zero of the function:
14961 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14962 ///     =>
14963 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
14964 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
14965                                          unsigned Iterations,
14966                                          SDNodeFlags *Flags, bool Reciprocal) {
14967   EVT VT = Arg.getValueType();
14968   SDLoc DL(Arg);
14969   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
14970   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
14971 
14972   // This routine must enter the loop below to work correctly
14973   // when (Reciprocal == false).
14974   assert(Iterations > 0);
14975 
14976   // Newton iterations for reciprocal square root:
14977   // E = (E * -0.5) * ((A * E) * E + -3.0)
14978   for (unsigned i = 0; i < Iterations; ++i) {
14979     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
14980     AddToWorklist(AE.getNode());
14981 
14982     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
14983     AddToWorklist(AEE.getNode());
14984 
14985     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
14986     AddToWorklist(RHS.getNode());
14987 
14988     // When calculating a square root at the last iteration build:
14989     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
14990     // (notice a common subexpression)
14991     SDValue LHS;
14992     if (Reciprocal || (i + 1) < Iterations) {
14993       // RSQRT: LHS = (E * -0.5)
14994       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
14995     } else {
14996       // SQRT: LHS = (A * E) * -0.5
14997       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
14998     }
14999     AddToWorklist(LHS.getNode());
15000 
15001     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
15002     AddToWorklist(Est.getNode());
15003   }
15004 
15005   return Est;
15006 }
15007 
15008 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
15009 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
15010 /// Op can be zero.
15011 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
15012                                            bool Reciprocal) {
15013   if (Level >= AfterLegalizeDAG)
15014     return SDValue();
15015 
15016   // TODO: Handle half and/or extended types?
15017   EVT VT = Op.getValueType();
15018   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15019     return SDValue();
15020 
15021   // If estimates are explicitly disabled for this function, we're done.
15022   MachineFunction &MF = DAG.getMachineFunction();
15023   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
15024   if (Enabled == TLI.ReciprocalEstimate::Disabled)
15025     return SDValue();
15026 
15027   // Estimates may be explicitly enabled for this type with a custom number of
15028   // refinement steps.
15029   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
15030 
15031   bool UseOneConstNR = false;
15032   if (SDValue Est =
15033       TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) {
15034     AddToWorklist(Est.getNode());
15035     if (Iterations) {
15036       Est = UseOneConstNR
15037       ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
15038       : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
15039     }
15040     return Est;
15041   }
15042 
15043   return SDValue();
15044 }
15045 
15046 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15047   return buildSqrtEstimateImpl(Op, Flags, true);
15048 }
15049 
15050 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15051   SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
15052   if (!Est)
15053     return SDValue();
15054 
15055   // Unfortunately, Est is now NaN if the input was exactly 0.
15056   // Select out this case and force the answer to 0.
15057   EVT VT = Est.getValueType();
15058   SDLoc DL(Op);
15059   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
15060   EVT CCVT = getSetCCResultType(VT);
15061   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
15062   AddToWorklist(ZeroCmp.getNode());
15063 
15064   Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
15065                     Zero, Est);
15066   AddToWorklist(Est.getNode());
15067   return Est;
15068 }
15069 
15070 /// Return true if base is a frame index, which is known not to alias with
15071 /// anything but itself.  Provides base object and offset as results.
15072 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
15073                            const GlobalValue *&GV, const void *&CV) {
15074   // Assume it is a primitive operation.
15075   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
15076 
15077   // If it's an adding a simple constant then integrate the offset.
15078   if (Base.getOpcode() == ISD::ADD) {
15079     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
15080       Base = Base.getOperand(0);
15081       Offset += C->getZExtValue();
15082     }
15083   }
15084 
15085   // Return the underlying GlobalValue, and update the Offset.  Return false
15086   // for GlobalAddressSDNode since the same GlobalAddress may be represented
15087   // by multiple nodes with different offsets.
15088   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
15089     GV = G->getGlobal();
15090     Offset += G->getOffset();
15091     return false;
15092   }
15093 
15094   // Return the underlying Constant value, and update the Offset.  Return false
15095   // for ConstantSDNodes since the same constant pool entry may be represented
15096   // by multiple nodes with different offsets.
15097   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
15098     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
15099                                          : (const void *)C->getConstVal();
15100     Offset += C->getOffset();
15101     return false;
15102   }
15103   // If it's any of the following then it can't alias with anything but itself.
15104   return isa<FrameIndexSDNode>(Base);
15105 }
15106 
15107 /// Return true if there is any possibility that the two addresses overlap.
15108 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
15109   // If they are the same then they must be aliases.
15110   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
15111 
15112   // If they are both volatile then they cannot be reordered.
15113   if (Op0->isVolatile() && Op1->isVolatile()) return true;
15114 
15115   // If one operation reads from invariant memory, and the other may store, they
15116   // cannot alias. These should really be checking the equivalent of mayWrite,
15117   // but it only matters for memory nodes other than load /store.
15118   if (Op0->isInvariant() && Op1->writeMem())
15119     return false;
15120 
15121   if (Op1->isInvariant() && Op0->writeMem())
15122     return false;
15123 
15124   // Gather base node and offset information.
15125   SDValue Base1, Base2;
15126   int64_t Offset1, Offset2;
15127   const GlobalValue *GV1, *GV2;
15128   const void *CV1, *CV2;
15129   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
15130                                       Base1, Offset1, GV1, CV1);
15131   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
15132                                       Base2, Offset2, GV2, CV2);
15133 
15134   // If they have a same base address then check to see if they overlap.
15135   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
15136     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15137              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15138 
15139   // It is possible for different frame indices to alias each other, mostly
15140   // when tail call optimization reuses return address slots for arguments.
15141   // To catch this case, look up the actual index of frame indices to compute
15142   // the real alias relationship.
15143   if (isFrameIndex1 && isFrameIndex2) {
15144     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
15145     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
15146     Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
15147     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15148              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15149   }
15150 
15151   // Otherwise, if we know what the bases are, and they aren't identical, then
15152   // we know they cannot alias.
15153   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
15154     return false;
15155 
15156   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
15157   // compared to the size and offset of the access, we may be able to prove they
15158   // do not alias.  This check is conservative for now to catch cases created by
15159   // splitting vector types.
15160   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
15161       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
15162       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
15163        Op1->getMemoryVT().getSizeInBits() >> 3) &&
15164       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
15165     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
15166     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
15167 
15168     // There is no overlap between these relatively aligned accesses of similar
15169     // size, return no alias.
15170     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
15171         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
15172       return false;
15173   }
15174 
15175   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
15176                    ? CombinerGlobalAA
15177                    : DAG.getSubtarget().useAA();
15178 #ifndef NDEBUG
15179   if (CombinerAAOnlyFunc.getNumOccurrences() &&
15180       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
15181     UseAA = false;
15182 #endif
15183   if (UseAA &&
15184       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
15185     // Use alias analysis information.
15186     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
15187                                  Op1->getSrcValueOffset());
15188     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
15189         Op0->getSrcValueOffset() - MinOffset;
15190     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
15191         Op1->getSrcValueOffset() - MinOffset;
15192     AliasResult AAResult =
15193         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
15194                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
15195                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
15196                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
15197     if (AAResult == NoAlias)
15198       return false;
15199   }
15200 
15201   // Otherwise we have to assume they alias.
15202   return true;
15203 }
15204 
15205 /// Walk up chain skipping non-aliasing memory nodes,
15206 /// looking for aliasing nodes and adding them to the Aliases vector.
15207 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
15208                                    SmallVectorImpl<SDValue> &Aliases) {
15209   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
15210   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
15211 
15212   // Get alias information for node.
15213   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
15214 
15215   // Starting off.
15216   Chains.push_back(OriginalChain);
15217   unsigned Depth = 0;
15218 
15219   // Look at each chain and determine if it is an alias.  If so, add it to the
15220   // aliases list.  If not, then continue up the chain looking for the next
15221   // candidate.
15222   while (!Chains.empty()) {
15223     SDValue Chain = Chains.pop_back_val();
15224 
15225     // For TokenFactor nodes, look at each operand and only continue up the
15226     // chain until we reach the depth limit.
15227     //
15228     // FIXME: The depth check could be made to return the last non-aliasing
15229     // chain we found before we hit a tokenfactor rather than the original
15230     // chain.
15231     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
15232       Aliases.clear();
15233       Aliases.push_back(OriginalChain);
15234       return;
15235     }
15236 
15237     // Don't bother if we've been before.
15238     if (!Visited.insert(Chain.getNode()).second)
15239       continue;
15240 
15241     switch (Chain.getOpcode()) {
15242     case ISD::EntryToken:
15243       // Entry token is ideal chain operand, but handled in FindBetterChain.
15244       break;
15245 
15246     case ISD::LOAD:
15247     case ISD::STORE: {
15248       // Get alias information for Chain.
15249       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
15250           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
15251 
15252       // If chain is alias then stop here.
15253       if (!(IsLoad && IsOpLoad) &&
15254           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
15255         Aliases.push_back(Chain);
15256       } else {
15257         // Look further up the chain.
15258         Chains.push_back(Chain.getOperand(0));
15259         ++Depth;
15260       }
15261       break;
15262     }
15263 
15264     case ISD::TokenFactor:
15265       // We have to check each of the operands of the token factor for "small"
15266       // token factors, so we queue them up.  Adding the operands to the queue
15267       // (stack) in reverse order maintains the original order and increases the
15268       // likelihood that getNode will find a matching token factor (CSE.)
15269       if (Chain.getNumOperands() > 16) {
15270         Aliases.push_back(Chain);
15271         break;
15272       }
15273       for (unsigned n = Chain.getNumOperands(); n;)
15274         Chains.push_back(Chain.getOperand(--n));
15275       ++Depth;
15276       break;
15277 
15278     default:
15279       // For all other instructions we will just have to take what we can get.
15280       Aliases.push_back(Chain);
15281       break;
15282     }
15283   }
15284 }
15285 
15286 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
15287 /// (aliasing node.)
15288 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
15289   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
15290 
15291   // Accumulate all the aliases to this node.
15292   GatherAllAliases(N, OldChain, Aliases);
15293 
15294   // If no operands then chain to entry token.
15295   if (Aliases.size() == 0)
15296     return DAG.getEntryNode();
15297 
15298   // If a single operand then chain to it.  We don't need to revisit it.
15299   if (Aliases.size() == 1)
15300     return Aliases[0];
15301 
15302   // Construct a custom tailored token factor.
15303   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
15304 }
15305 
15306 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
15307   // This holds the base pointer, index, and the offset in bytes from the base
15308   // pointer.
15309   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
15310 
15311   // We must have a base and an offset.
15312   if (!BasePtr.Base.getNode())
15313     return false;
15314 
15315   // Do not handle stores to undef base pointers.
15316   if (BasePtr.Base.isUndef())
15317     return false;
15318 
15319   SmallVector<StoreSDNode *, 8> ChainedStores;
15320   ChainedStores.push_back(St);
15321 
15322   // Walk up the chain and look for nodes with offsets from the same
15323   // base pointer. Stop when reaching an instruction with a different kind
15324   // or instruction which has a different base pointer.
15325   StoreSDNode *Index = St;
15326   while (Index) {
15327     // If the chain has more than one use, then we can't reorder the mem ops.
15328     if (Index != St && !SDValue(Index, 0)->hasOneUse())
15329       break;
15330 
15331     if (Index->isVolatile() || Index->isIndexed())
15332       break;
15333 
15334     // Find the base pointer and offset for this memory node.
15335     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
15336 
15337     // Check that the base pointer is the same as the original one.
15338     if (!Ptr.equalBaseIndex(BasePtr))
15339       break;
15340 
15341     // Find the next memory operand in the chain. If the next operand in the
15342     // chain is a store then move up and continue the scan with the next
15343     // memory operand. If the next operand is a load save it and use alias
15344     // information to check if it interferes with anything.
15345     SDNode *NextInChain = Index->getChain().getNode();
15346     while (true) {
15347       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
15348         // We found a store node. Use it for the next iteration.
15349         if (STn->isVolatile() || STn->isIndexed()) {
15350           Index = nullptr;
15351           break;
15352         }
15353         ChainedStores.push_back(STn);
15354         Index = STn;
15355         break;
15356       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
15357         NextInChain = Ldn->getChain().getNode();
15358         continue;
15359       } else {
15360         Index = nullptr;
15361         break;
15362       }
15363     }
15364   }
15365 
15366   bool MadeChangeToSt = false;
15367   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
15368 
15369   for (StoreSDNode *ChainedStore : ChainedStores) {
15370     SDValue Chain = ChainedStore->getChain();
15371     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
15372 
15373     if (Chain != BetterChain) {
15374       if (ChainedStore == St)
15375         MadeChangeToSt = true;
15376       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
15377     }
15378   }
15379 
15380   // Do all replacements after finding the replacements to make to avoid making
15381   // the chains more complicated by introducing new TokenFactors.
15382   for (auto Replacement : BetterChains)
15383     replaceStoreChain(Replacement.first, Replacement.second);
15384 
15385   return MadeChangeToSt;
15386 }
15387 
15388 /// This is the entry point for the file.
15389 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
15390                            CodeGenOpt::Level OptLevel) {
15391   /// This is the main entry point to this class.
15392   DAGCombiner(*this, AA, OptLevel).Run(Level);
15393 }
15394